Module:Citation/CS1/COinS
< Module:Citation | CS1
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Citation/CS1/COinS/doc
1 local coins = {};
2
3
4 --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
5 ]]
6
7 local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities
8
9 local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
10
11
12 --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
13
14 Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
15 This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
16 markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
17
18 ]]
19
20 local function strip_apostrophe_markup (argument)
21 if not is_set (argument) then return argument; end
22
23 if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit.
24 return argument;
25 end
26
27 while true do
28 if argument:find ( "'''''", 1, true ) then -- bold italic (5)
29 argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
30 elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)
31 argument=argument:gsub("%'%'%'%'", "");
32 elseif argument:find ( "'''", 1, true ) then -- bold (3)
33 argument=argument:gsub("%'%'%'", "");
34 elseif argument:find ( "''", 1, true ) then -- italic (2)
35 argument=argument:gsub("%'%'", "");
36 else
37 break;
38 end
39 end
40 return argument; -- done
41 end
42
43
44 --[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
45
46 Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
47
48 Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings
49 of %27%27...
50
51 ]]
52
53 local function make_coins_title (title, script)
54 if is_set (title) then
55 title = strip_apostrophe_markup (title); -- strip any apostrophe markup
56 else
57 title=''; -- if not set, make sure title is an empty string
58 end
59 if is_set (script) then
60 script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)
61 script = strip_apostrophe_markup (script); -- strip any apostrophe markup
62 else
63 script=''; -- if not set, make sure script is an empty string
64 end
65 if is_set (title) and is_set (script) then
66 script = ' ' .. script; -- add a space before we concatenate
67 end
68 return title .. script; -- return the concatenation
69 end
70
71
72 --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
73
74 Returns a string where all of lua's magic characters have been escaped. This is important because functions like
75 string.gsub() treat their pattern and replace strings as patterns, not literal strings.
76 ]]
77
78 local function escape_lua_magic_chars (argument)
79 argument = argument:gsub("%%", "%%%%"); -- replace % with %%
80 argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
81 return argument;
82 end
83
84
85 --[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
86
87 Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
88
89 ]]
90
91 local function get_coins_pages (pages)
92 local pattern;
93 if not is_set (pages) then return pages; end -- if no page numbers then we're done
94
95 while true do
96 pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
97 if nil == pattern then break; end -- no more urls
98 pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
99 pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
100 end
101 pages = pages:gsub("[%[%]]", ""); -- remove the brackets
102 pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
103 pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?
104 return pages;
105 end
106
107
108 --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------
109
110 There are three options for math markup rendering that depend on the editor's math preference settings. These
111 settings are at [[Special:Preferences#mw-prefsection-rendering]] and are
112 PNG images
113 TeX source
114 MathML with SVG or PNG fallback
115
116 All three are heavy with html and css which doesn't belong in the metadata.
117
118 Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
119 of the last editor to save the page.
120
121 This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
122 then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
123 that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation.
124
125 When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital
126 value. To replace multipe equations it is necesary to call this function from within a loop.
127
128 ]=]
129
130 local function coins_replace_math_stripmarker (value)
131 local stripmarker = '\127UNIQ%-%-math%-[%a%d]+%-QINU\127'; -- math stripmarker pattern
132 local rendering = value:match (stripmarker); -- is there a math stripmarker
133
134 if not rendering then -- when value doesn't have a math stripmarker, abandon this test
135 return false, value;
136 end
137
138 rendering = mw.text.unstripNoWiki (rendering); -- convert stripmarker into rendered value (or nil? ''? when math render error)
139
140 if rendering:match ('alt="[^"]+"') then -- if PNG math option
141 rendering = rendering:match ('alt="([^"]+)"'); -- extract just the math text
142 elseif rendering:match ('$%s+.+%s+%$') then -- if TeX math option; $ is legit character that is escapes as \$
143 rendering = rendering:match ('$%s+(.+)%s+%$') -- extract just the math text
144 elseif rendering:match ('<annotation[^>]+>.+</annotation>') then -- if MathML math option
145 rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>') -- extract just the math text
146 else
147 return false, value; -- had math stripmarker but not one of the three defined forms
148 end
149
150 return true, value:gsub (stripmarker, rendering, 1);
151 end
152
153
154 --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
155
156 Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities.
157
158 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaced math stripmarkers with the appropriate content
159 when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29
160
161 TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
162 characters table?
163
164 ]]
165
166 local function coins_cleanup (value)
167 local replaced = true; -- default state to get the do loop running
168
169 while replaced do -- loop until all math stripmarkers replaced
170 replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation
171 end
172
173 value = value:gsub ('\127UNIQ%-%-math%-[%a%d]+%-QINU\127', "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
174
175 value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
176 value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">'s</span>', "'s"); -- replace {{'s}} template with simple apostrophe-s
177 value = value:gsub ('‍\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe
178 value = value:gsub ('\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe (as of 2015-12-11)
179 value = value:gsub (' ', ' '); -- replace entity with plain space
180 value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
181 value = value:gsub ('‍', ''); -- remove ‍ entities
182 value = value:gsub ('[\226\128\141\226\128\139]', '') -- remove zero-width joiner, zero-width space
183 value = value:gsub ('[\194\173\009\010\013]', ' '); -- replace soft hyphen, horizontal tab, line feed, carriage return with plain space
184 return value;
185 end
186
187
188 --[[--------------------------< C O I N S >--------------------------------------------------------------------
189
190 COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.
191
192 ]]
193
194 local function COinS(data, class)
195 if 'table' ~= type(data) or nil == next(data) then
196 return '';
197 end
198
199 for k, v in pairs (data) do -- spin through all of the metadata parameter values
200 if 'ID_list' ~= k and 'Authors' ~= k then -- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed)
201 data[k] = coins_cleanup (v);
202 end
203 end
204
205 local ctx_ver = "Z39.88-2004";
206
207 -- treat table strictly as an array with only set values.
208 local OCinSoutput = setmetatable( {}, {
209 __newindex = function(self, key, value)
210 if is_set(value) then
211 rawset( self, #self+1, table.concat{ key, '=', mw.uri.encode( remove_wiki_link( value ) ) } );
212 end
213 end
214 });
215
216 if in_array (class, {'arxiv', 'journal', 'news'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
217 ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
218 OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
219 if 'arxiv' == class then -- set genre according to the type of citation template we are rendering
220 OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv
221 elseif 'conference' == class then
222 OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
223 elseif 'web' == class then
224 OCinSoutput["rft.genre"] = "unknown"; -- cite web (when Periodical set)
225 else
226 OCinSoutput["rft.genre"] = "article"; -- journal and other 'periodical' articles
227 end
228 OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only
229 OCinSoutput["rft.atitle"] = data.Title; -- 'periodical' article titles
230
231 -- these used only for periodicals
232 OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
233 OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
234 OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
235 OCinSoutput["rft.issue"] = data.Issue;
236 OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
237
238 elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes
239 OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"; -- book metadata identifier
240 if 'report' == class or 'techreport' == class then -- cite report and cite techreport
241 OCinSoutput["rft.genre"] = "report";
242 elseif 'conference' == class then -- cite conference when Periodical not set
243 OCinSoutput["rft.genre"] = "conference";
244 OCinSoutput["rft.atitle"] = data.Chapter; -- conference paper as chapter in proceedings (book)
245 elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
246 if is_set (data.Chapter) then
247 OCinSoutput["rft.genre"] = "bookitem";
248 OCinSoutput["rft.atitle"] = data.Chapter; -- book chapter, encyclopedia article, interview in a book, or map title
249 else
250 if 'map' == class or 'interview' == class then
251 OCinSoutput["rft.genre"] = 'unknown'; -- standalone map or interview
252 else
253 OCinSoutput["rft.genre"] = 'book'; -- book and encyclopedia
254 end
255 end
256 else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
257 OCinSoutput["rft.genre"] = "unknown";
258 end
259 OCinSoutput["rft.btitle"] = data.Title; -- book only
260 OCinSoutput["rft.place"] = data.PublicationPlace; -- book only
261 OCinSoutput["rft.series"] = data.Series; -- book only
262 OCinSoutput["rft.pages"] = data.Pages; -- book, journal
263 OCinSoutput["rft.edition"] = data.Edition; -- book only
264 OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation
265
266 else -- cite thesis
267 OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier
268 OCinSoutput["rft.title"] = data.Title; -- dissertation (also patent but that is not yet supported)
269 OCinSoutput["rft.degree"] = data.Degree; -- dissertation only
270 OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
271 end
272 -- and now common parameters (as much as possible)
273 OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
274
275 for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
276 -- if k == 'ISBN' then v = clean_isbn( v ) end
277 if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
278 local id = cfg.id_handlers[k].COinS;
279 if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
280 OCinSoutput["rft_id"] = table.concat{ id, "/", v };
281 elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
282 OCinSoutput[ id ] = v;
283 elseif id then -- when cfg.id_handlers[k].COinS is not nil
284 OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url
285 end
286 end
287
288 --[[
289 for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
290 local id, value = cfg.id_handlers[k].COinS;
291 if k == 'ISBN' then value = clean_isbn( v ); else value = v; end
292 if string.sub( id or "", 1, 4 ) == 'info' then
293 OCinSoutput["rft_id"] = table.concat{ id, "/", v };
294 else
295 OCinSoutput[ id ] = value;
296 end
297 end
298 ]]
299 local last, first;
300 for k, v in ipairs( data.Authors ) do
301 last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
302 if k == 1 then -- for the first author name only
303 if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
304 OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
305 OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
306 elseif is_set(last) then
307 OCinSoutput["rft.au"] = last; -- book, journal, dissertation -- otherwise use this form for the first name
308 end
309 else -- for all other authors
310 if is_set(last) and is_set(first) then
311 OCinSoutput["rft.au"] = table.concat{ last, ", ", first }; -- book, journal, dissertation
312 elseif is_set(last) then
313 OCinSoutput["rft.au"] = last; -- book, journal, dissertation
314 end
315 end
316 end
317
318 OCinSoutput.rft_id = data.URL;
319 OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
320 OCinSoutput = setmetatable( OCinSoutput, nil );
321
322 -- sort with version string always first, and combine.
323 table.sort( OCinSoutput );
324 table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
325 return table.concat(OCinSoutput, "&");
326 end
327
328
329 --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
330
331 Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
332
333 ]]
334
335 local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
336 cfg = cfg_table_ptr;
337
338 is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module
339 in_array = utilities_page_ptr.in_array;
340 remove_wiki_link = utilities_page_ptr.remove_wiki_link;
341 end
342
343
344
345 return {
346 make_coins_title = make_coins_title,
347 get_coins_pages = get_coins_pages,
348 COinS = COinS,
349 set_selected_modules = set_selected_modules,
350 }