Module:Citation/CS1/Identifiers
< Module:Citation | CS1
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Citation/CS1/Identifiers/doc
1 local identifiers = {};
2
3
4 --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
5 ]]
6
7 local is_set, in_array, set_error, select_one, add_maint_cat; -- functions in Module:Citation/CS1/Utilities
8
9 local z; -- table of tables defined in Module:Citation/CS1/Utilities
10
11 local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
12
13
14 --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
15
16 Formats a wiki style external link
17
18 ]]
19
20 local function external_link_id(options)
21 local url_string = options.id;
22 if options.encode == true or options.encode == nil then
23 url_string = mw.uri.encode( url_string );
24 end
25 return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
26 options.link, options.label, options.separator or " ",
27 options.prefix, url_string, options.suffix or "",
28 mw.text.nowiki(options.id)
29 );
30 end
31
32
33 --[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------
34
35 Formats a wiki style internal link
36
37 ]]
38
39 local function internal_link_id(options)
40 return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',
41 options.link, options.label, options.separator or " ",
42 options.prefix, options.id, options.suffix or "",
43 mw.text.nowiki(options.id)
44 );
45 end
46
47
48 --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
49
50 ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.
51 ISBN-13 is checked in check_isbn().
52
53 If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length
54 and stripped of dashes, spaces and other non-isxn characters.
55
56 ]]
57
58 local function is_valid_isxn (isxn_str, len)
59 local temp = 0;
60 isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
61 len = len+1; -- adjust to be a loop counter
62 for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
63 if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
64 temp = temp + 10*( len - i ); -- it represents 10 decimal
65 else
66 temp = temp + tonumber( string.char(v) )*(len-i);
67 end
68 end
69 return temp % 11 == 0; -- returns true if calculation result is zero
70 end
71
72
73 --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------
74
75 ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
76 If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
77 and stripped of dashes, spaces and other non-isxn-13 characters.
78
79 ]]
80
81 local function is_valid_isxn_13 (isxn_str)
82 local temp=0;
83
84 isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
85 for i, v in ipairs( isxn_str ) do
86 temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
87 end
88 return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct
89 end
90
91
92 --[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------
93
94 Determines whether an ISBN string is valid
95
96 ]]
97
98 local function check_isbn( isbn_str )
99 if nil ~= isbn_str:match("[^%s-0-9X]") then return false; end -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
100 isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
101 local len = isbn_str:len();
102
103 if len ~= 10 and len ~= 13 then
104 return false;
105 end
106
107 if len == 10 then
108 if isbn_str:match( "^%d*X?$" ) == nil then return false; end
109 return is_valid_isxn(isbn_str, 10);
110 else
111 local temp = 0;
112 if isbn_str:match( "^97[89]%d*$" ) == nil then return false; end -- isbn13 begins with 978 or 979; ismn begins with 979
113 return is_valid_isxn_13 (isbn_str);
114 end
115 end
116
117
118 --[[--------------------------< I S M N >----------------------------------------------------------------------
119
120 Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
121 same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
122 section 2, pages 9–12.
123
124 ]]
125
126 local function ismn (id)
127 local handler = cfg.id_handlers['ISMN'];
128 local text;
129 local valid_ismn = true;
130
131 id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
132
133 if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
134 valid_ismn = false;
135 else
136 valid_ismn=is_valid_isxn_13 (id); -- validate ismn
137 end
138
139 -- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
140 -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
141
142 text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id; -- because no place to link to yet
143
144 if false == valid_ismn then
145 text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid
146 end
147
148 return text;
149 end
150
151
152 --[[--------------------------< I S S N >----------------------------------------------------------------------
153
154 Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but
155 has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked
156 like this:
157
158 |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
159
160 This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length
161 and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters
162 other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message. The
163 issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
164
165 ]]
166
167 local function issn(id, e)
168 local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
169 local handler;
170 local text;
171 local valid_issn = true;
172
173 if e then
174 handler = cfg.id_handlers['EISSN'];
175 else
176 handler = cfg.id_handlers['ISSN'];
177 end
178
179 id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
180
181 if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
182 valid_issn=false; -- wrong length or improper character
183 else
184 valid_issn=is_valid_isxn(id, 8); -- validate issn
185 end
186
187 if true == valid_issn then
188 id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
189 else
190 id = issn_copy; -- if not valid, use the show the invalid issn with error message
191 end
192
193 text = external_link_id({link = handler.link, label = handler.label,
194 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
195
196 if false == valid_issn then
197 text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid
198 end
199
200 return text
201 end
202
203
204 --[[--------------------------< A M A Z O N >------------------------------------------------------------------
205
206 Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
207 characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
208 isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
209 Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
210
211 ]]
212
213 local function amazon(id, domain)
214 local err_cat = ""
215
216 if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
217 err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
218 else
219 if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
220 if check_isbn( id ) then -- see if asin value is isbn10
221 add_maint_cat ('ASIN');
222 elseif not is_set (err_cat) then
223 err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
224 end
225 elseif not id:match("^%u[%d%u]+$") then
226 err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
227 end
228 end
229 if not is_set(domain) then
230 domain = "com";
231 elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
232 domain = "co." .. domain;
233 elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
234 domain = "com." .. domain;
235 end
236 local handler = cfg.id_handlers['ASIN'];
237 return external_link_id({link=handler.link,
238 label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
239 id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
240 end
241
242
243 --[[--------------------------< A R X I V >--------------------------------------------------------------------
244
245 See: http://arxiv.org/help/arxiv_identifier
246
247 format and error check arXiv identifier. There are three valid forms of the identifier:
248 the first form, valid only between date codes 9108 and 0703 is:
249 arXiv:<archive>.<class>/<date code><number><version>
250 where:
251 <archive> is a string of alpha characters - may be hyphenated; no other punctuation
252 <class> is a string of alpha characters - may be hyphenated; no other punctuation
253 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
254 first digit of YY for this form can only 9 and 0
255 <number> is a three-digit number
256 <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
257
258 the second form, valid from April 2007 through December 2014 is:
259 arXiv:<date code>.<number><version>
260 where:
261 <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
262 <number> is a four-digit number
263 <version> is a 1 or more digit number preceded with a lowercase v; no spaces
264
265 the third form, valid from January 2015 is:
266 arXiv:<date code>.<number><version>
267 where:
268 <date code> and <version> are as defined for 0704-1412
269 <number> is a five-digit number
270 ]]
271
272 local function arxiv (id, class)
273 local handler = cfg.id_handlers['ARXIV'];
274 local year, month, version;
275 local err_cat = '';
276 local text;
277
278 if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
279 year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
280 year = tonumber(year);
281 month = tonumber(month);
282 if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
283 ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
284 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
285 end
286 elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
287 year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
288 year = tonumber(year);
289 month = tonumber(month);
290 if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
291 ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
292 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
293 end
294 elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
295 year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
296 year = tonumber(year);
297 month = tonumber(month);
298 if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
299 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
300 end
301 else
302 err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format
303 end
304
305 text = external_link_id({link = handler.link, label = handler.label,
306 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
307
308 if is_set (class) then
309 class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink
310 else
311 class = ''; -- empty string for concatenation
312 end
313
314 return text .. class;
315 end
316
317
318 --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
319
320 lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
321 1. Remove all blanks.
322 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
323 3. If there is a hyphen in the string:
324 a. Remove it.
325 b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
326 1. All these characters should be digits, and there should be six or less. (not done in this function)
327 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
328
329 Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
330 ]]
331
332 local function normalize_lccn (lccn)
333 lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
334
335 if nil ~= string.find (lccn,'/') then
336 lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
337 end
338
339 local prefix
340 local suffix
341 prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
342
343 if nil ~= suffix then -- if there was a hyphen
344 suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
345 lccn=prefix..suffix; -- reassemble the lccn
346 end
347
348 return lccn;
349 end
350
351
352 --[[--------------------------< L C C N >----------------------------------------------------------------------
353
354 Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of
355 the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits.
356 http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
357
358 length = 8 then all digits
359 length = 9 then lccn[1] is lower case alpha
360 length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
361 length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
362 length = 12 then lccn[1] and lccn[2] are both lower case alpha
363
364 ]]
365
366 local function lccn(lccn)
367 local handler = cfg.id_handlers['LCCN'];
368 local err_cat = ''; -- presume that LCCN is valid
369 local id = lccn; -- local copy of the lccn
370
371 id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
372 local len = id:len(); -- get the length of the lccn
373
374 if 8 == len then
375 if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
376 err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
377 end
378 elseif 9 == len then -- LCCN should be adddddddd
379 if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
380 err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
381 end
382 elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
383 if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
384 if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
385 err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
386 end
387 end
388 elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
389 if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
390 err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
391 end
392 elseif 12 == len then -- LCCN should be aadddddddddd
393 if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
394 err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
395 end
396 else
397 err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
398 end
399
400 if not is_set (err_cat) and nil ~= lccn:find ('%s') then
401 err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
402 end
403
404 return external_link_id({link = handler.link, label = handler.label,
405 prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
406 end
407
408
409 --[[--------------------------< P M I D >----------------------------------------------------------------------
410
411 Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This
412 code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
413 test_limit will need to be updated periodically as more PMIDs are issued.
414
415 ]]
416
417 local function pmid(id)
418 local test_limit = 30000000; -- update this value as PMIDs approach
419 local handler = cfg.id_handlers['PMID'];
420 local err_cat = ''; -- presume that PMID is valid
421
422 if id:match("[^%d]") then -- if PMID has anything but digits
423 err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
424 else -- PMID is only digits
425 local id_num = tonumber(id); -- convert id to a number for range testing
426 if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
427 err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
428 end
429 end
430
431 return external_link_id({link = handler.link, label = handler.label,
432 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
433 end
434
435
436 --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
437
438 Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
439 in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
440 |embargo= was not set in this cite.
441
442 ]]
443
444 local function is_embargoed (embargo)
445 if is_set (embargo) then
446 local lang = mw.getContentLanguage();
447 local good1, embargo_date, good2, todays_date;
448 good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
449 good2, todays_date = pcall( lang.formatDate, lang, 'U' );
450
451 if good1 and good2 then -- if embargo date and today's date are good dates
452 if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
453 return embargo; -- still embargoed
454 else
455 add_maint_cat ('embargo')
456 return ''; -- unset because embargo has expired
457 end
458 end
459 end
460 return ''; -- |embargo= not set return empty string
461 end
462
463
464 --[[--------------------------< P M C >------------------------------------------------------------------------
465
466 Format a PMC, do simple error checking, and check for embargoed articles.
467
468 The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
469 be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the
470 PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
471
472 PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
473 has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
474 returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
475
476 PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less
477 than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
478
479 ]]
480
481 local function pmc(id, embargo)
482 local test_limit = 5000000; -- update this value as PMCs approach
483 local handler = cfg.id_handlers['PMC'];
484 local err_cat = ''; -- presume that PMC is valid
485
486 local text;
487
488 if id:match("[^%d]") then -- if PMC has anything but digits
489 err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
490 else -- PMC is only digits
491 local id_num = tonumber(id); -- convert id to a number for range testing
492 if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
493 err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
494 end
495 end
496
497 if is_set (embargo) then -- is PMC is still embargoed?
498 text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; -- still embargoed so no external link
499 else
500 text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
501 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
502 end
503 return text;
504 end
505
506
507 --[[--------------------------< D O I >------------------------------------------------------------------------
508
509 Formats a DOI and checks for DOI errors.
510
511 DOI names contain two parts: prefix and suffix separated by a forward slash.
512 Prefix: directory indicator '10.' followed by a registrant code
513 Suffix: character string of any length chosen by the registrant
514
515 This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends
516 with a period or a comma, this function will emit a bad_doi error message.
517
518 DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
519 and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
520 if ever used in doi names.
521
522 ]]
523
524 local function doi(id, inactive)
525 local cat = ""
526 local handler = cfg.id_handlers['DOI'];
527
528 local text;
529 if is_set(inactive) then
530 local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
531 text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;
532 if is_set(inactive_year) then
533 table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
534 else
535 table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
536 end
537 inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
538 else
539 text = external_link_id({link = handler.link, label = handler.label,
540 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
541 inactive = ""
542 end
543
544 if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
545 cat = ' ' .. set_error( 'bad_doi' );
546 end
547 return text .. inactive .. cat
548 end
549
550
551 --[[--------------------------< H D L >------------------------------------------------------------------------
552
553 Formats an HDL with minor error checking.
554
555 HDL names contain two parts: prefix and suffix separated by a forward slash.
556 Prefix: character string using any character in the UCS-2 character set except '/'
557 Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant
558
559 This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends
560 with a period or a comma, this function will emit a bad_hdl error message.
561
562 HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and
563 terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
564 if ever used in HDLs.
565
566 ]]
567
568 local function hdl(id)
569 local handler = cfg.id_handlers['HDL'];
570
571 local text = external_link_id({link = handler.link, label = handler.label,
572 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
573
574 if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
575 text = text .. ' ' .. set_error( 'bad_hdl' );
576 end
577 return text;
578 end
579
580
581 --[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
582
583 Formats an OpenLibrary link, and checks for associated errors.
584
585 ]]
586
587 local function openlibrary(id)
588 local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
589 local handler = cfg.id_handlers['OL'];
590
591 if ( code == "A" ) then
592 return external_link_id({link=handler.link, label=handler.label,
593 prefix=handler.prefix .. 'authors/OL',
594 id=id, separator=handler.separator, encode = handler.encode})
595 elseif ( code == "M" ) then
596 return external_link_id({link=handler.link, label=handler.label,
597 prefix=handler.prefix .. 'books/OL',
598 id=id, separator=handler.separator, encode = handler.encode})
599 elseif ( code == "W" ) then
600 return external_link_id({link=handler.link, label=handler.label,
601 prefix=handler.prefix .. 'works/OL',
602 id=id, separator=handler.separator, encode = handler.encode})
603 else
604 return external_link_id({link=handler.link, label=handler.label,
605 prefix=handler.prefix .. 'OL',
606 id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
607 end
608 end
609
610
611 --[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
612
613 Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in
614 '<' and/or '>' angle brackets.
615
616 ]]
617
618 local function message_id (id)
619 local handler = cfg.id_handlers['USENETID'];
620
621 local text = external_link_id({link = handler.link, label = handler.label,
622 prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
623
624 if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
625 text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid
626 end
627
628 return text
629 end
630
631
632 --[[--------------------------< O C L C >----------------------------------------------------------------------
633
634 Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html
635
636 ]]
637
638 local function oclc (id)
639 local handler = cfg.id_handlers['OCLC'];
640 local number;
641 local err_msg = ''; -- empty string for concatenation
642
643 if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters)
644 number = id:match('ocm(%d+)'); -- get the number
645 elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters)
646 number = id:match('ocn(%d+)'); -- get the number
647 elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters)
648 number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number
649 elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field
650 number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number
651 if 9 < number:len() then
652 number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers
653 end
654 elseif id:match('^%d+$') then -- no prefix
655 number = id; -- get the number
656 if 10 < number:len() then
657 number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers
658 end
659 end
660
661 if number then -- proper format
662 id = number; -- exclude prefix, if any, from external link
663 else
664 err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed
665 end
666
667 local text = external_link_id({link=handler.link, label=handler.label,
668 prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
669
670 return text;
671 end
672
673
674 --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
675
676 Takes a table of IDs created by extract_ids() and turns it into a table of formatted ID outputs.
677
678 inputs:
679 id_list – table of identifiers built by extract_ids()
680 options – table of various template parameter values used to modify some manually handled identifiers
681
682 ]]
683
684 local function build_id_list( id_list, options )
685 local new_list, handler = {};
686
687 local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;
688
689 for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
690 -- fallback to read-only cfg
691 handler = setmetatable( { ['id'] = v }, fallback(k) );
692
693 if handler.mode == 'external' then
694 table.insert( new_list, {handler.label, external_link_id( handler ) } );
695 elseif handler.mode == 'internal' then
696 table.insert( new_list, {handler.label, internal_link_id( handler ) } );
697 elseif handler.mode ~= 'manual' then
698 error( cfg.messages['unknown_ID_mode'] );
699 elseif k == 'DOI' then
700 table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } );
701 elseif k == 'HDL' then
702 table.insert( new_list, {handler.label, hdl( v ) } );
703 elseif k == 'ARXIV' then
704 table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );
705 elseif k == 'ASIN' then
706 table.insert( new_list, {handler.label, amazon( v, options.ASINTLD ) } );
707 elseif k == 'LCCN' then
708 table.insert( new_list, {handler.label, lccn( v ) } );
709 elseif k == 'OL' or k == 'OLA' then
710 table.insert( new_list, {handler.label, openlibrary( v ) } );
711 elseif k == 'PMC' then
712 table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
713 elseif k == 'PMID' then
714 table.insert( new_list, {handler.label, pmid( v ) } );
715 elseif k == 'OCLC' then
716 table.insert( new_list, {handler.label, oclc( v ) } );
717 elseif k == 'ISMN' then
718 table.insert( new_list, {handler.label, ismn( v ) } );
719 elseif k == 'ISSN' then
720 table.insert( new_list, {handler.label, issn( v ) } );
721 elseif k == 'EISSN' then
722 table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn
723 elseif k == 'ISBN' then
724 local ISBN = internal_link_id( handler );
725 if not check_isbn( v ) and not is_set(options.IgnoreISBN) then
726 ISBN = ISBN .. set_error( 'bad_isbn', {}, false, " ", "" );
727 end
728 table.insert( new_list, {handler.label, ISBN } );
729 elseif k == 'USENETID' then
730 table.insert( new_list, {handler.label, message_id( v ) } );
731 else
732 error( cfg.messages['unknown_manual_ID'] );
733 end
734 end
735
736 local function comp( a, b ) -- used in following table.sort()
737 return a[1] < b[1];
738 end
739
740 table.sort( new_list, comp );
741 for k, v in ipairs( new_list ) do
742 new_list[k] = v[2];
743 end
744
745 return new_list;
746 end
747
748
749 --[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------
750
751 Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
752 any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to
753 the identifier list. Emits redundant error message is more than one alias exists in args
754
755 ]]
756
757 local function extract_ids( args )
758 local id_list = {}; -- list of identifiers found in args
759 for k, v in pairs( cfg.id_handlers ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
760 v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present
761 if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list
762 end
763 return id_list;
764 end
765
766
767 --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
768
769 Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
770
771 ]]
772
773 local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
774 cfg = cfg_table_ptr;
775
776 is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module
777 in_array = utilities_page_ptr.in_array;
778 set_error = utilities_page_ptr.set_error;
779 select_one = utilities_page_ptr.select_one;
780 add_maint_cat = utilities_page_ptr.add_maint_cat;
781
782 z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities
783 end
784
785
786
787 return {
788 build_id_list = build_id_list,
789 extract_ids = extract_ids,
790 is_embargoed = is_embargoed;
791 set_selected_modules = set_selected_modules;
792 }