Module:Citation/CS1/Identifiers

The educational technology and digital learning wiki
Jump to navigation Jump to search

Documentation for this module may be created at Module:Citation/CS1/Identifiers/doc

  1 local identifiers = {};
  2 
  3 
  4 --[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------
  5 ]]
  6 
  7 local is_set, in_array, set_error, select_one, add_maint_cat;					-- functions in Module:Citation/CS1/Utilities
  8 
  9 local z;																		-- table of tables defined in Module:Citation/CS1/Utilities
 10 
 11 local cfg;																		-- table of configuration tables that are defined in Module:Citation/CS1/Configuration
 12 
 13 
 14 --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
 15 
 16 Formats a wiki style external link
 17 
 18 ]]
 19 
 20 local function external_link_id(options)
 21 	local url_string = options.id;
 22 	if options.encode == true or options.encode == nil then
 23 		url_string = mw.uri.encode( url_string );
 24 	end
 25 	return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
 26 		options.link, options.label, options.separator or "&nbsp;",
 27 		options.prefix, url_string, options.suffix or "",
 28 		mw.text.nowiki(options.id)
 29 	);
 30 end
 31 
 32 
 33 --[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------
 34 
 35 Formats a wiki style internal link
 36 
 37 ]]
 38 
 39 local function internal_link_id(options)
 40 	return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',
 41 		options.link, options.label, options.separator or "&nbsp;",
 42 		options.prefix, options.id, options.suffix or "",
 43 		mw.text.nowiki(options.id)
 44 	);
 45 end
 46 
 47 
 48 --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
 49 
 50 ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.
 51 ISBN-13 is checked in check_isbn().
 52 
 53 If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length
 54 and stripped of dashes, spaces and other non-isxn characters.
 55 
 56 ]]
 57 
 58 local function is_valid_isxn (isxn_str, len)
 59 	local temp = 0;
 60 	isxn_str = { isxn_str:byte(1, len) };	-- make a table of byte values '0' → 0x30 .. '9'  → 0x39, 'X' → 0x58
 61 	len = len+1;							-- adjust to be a loop counter
 62 	for i, v in ipairs( isxn_str ) do		-- loop through all of the bytes and calculate the checksum
 63 		if v == string.byte( "X" ) then		-- if checkdigit is X (compares the byte value of 'X' which is 0x58)
 64 			temp = temp + 10*( len - i );	-- it represents 10 decimal
 65 		else
 66 			temp = temp + tonumber( string.char(v) )*(len-i);
 67 		end
 68 	end
 69 	return temp % 11 == 0;					-- returns true if calculation result is zero
 70 end
 71 
 72 
 73 --[[--------------------------< IS _ V A L I D _ I S X N  _ 1 3 >----------------------------------------------
 74 
 75 ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
 76 If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
 77 and stripped of dashes, spaces and other non-isxn-13 characters.
 78 
 79 ]]
 80 
 81 local function is_valid_isxn_13 (isxn_str)
 82 	local temp=0;
 83 	
 84 	isxn_str = { isxn_str:byte(1, 13) };										-- make a table of byte values '0' → 0x30 .. '9'  → 0x39
 85 	for i, v in ipairs( isxn_str ) do
 86 		temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) );				-- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
 87 	end
 88 	return temp % 10 == 0;														-- sum modulo 10 is zero when isbn-13/ismn is correct
 89 end
 90 
 91 
 92 --[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------
 93 
 94 Determines whether an ISBN string is valid
 95 
 96 ]]
 97 
 98 local function check_isbn( isbn_str )
 99 	if nil ~= isbn_str:match("[^%s-0-9X]") then return false; end		-- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
100 	isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" );	-- remove hyphens and spaces
101 	local len = isbn_str:len();
102  
103 	if len ~= 10 and len ~= 13 then
104 		return false;
105 	end
106 
107 	if len == 10 then
108 		if isbn_str:match( "^%d*X?$" ) == nil then return false; end
109 		return is_valid_isxn(isbn_str, 10);
110 	else
111 		local temp = 0;
112 		if isbn_str:match( "^97[89]%d*$" ) == nil then return false; end		-- isbn13 begins with 978 or 979; ismn begins with 979
113 		return is_valid_isxn_13 (isbn_str);
114 	end
115 end
116 
117 
118 --[[--------------------------< I S M N >----------------------------------------------------------------------
119 
120 Determines whether an ISMN string is valid.  Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
121 same check digit calculations.  See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
122 section 2, pages 9–12.
123 
124 ]]
125 
126 local function ismn (id)
127 	local handler = cfg.id_handlers['ISMN'];
128 	local text;
129 	local valid_ismn = true;
130 
131 	id=id:gsub( "[%s-–]", "" );													-- strip spaces, hyphens, and endashes from the ismn
132 
133 	if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then					-- ismn must be 13 digits and begin 9790
134 		valid_ismn = false;
135 	else
136 		valid_ismn=is_valid_isxn_13 (id);										-- validate ismn
137 	end
138 
139 --	text = internal_link_id({link = handler.link, label = handler.label,		-- use this (or external version) when there is some place to link to
140 --		prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
141  
142 	text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id;		-- because no place to link to yet
143 
144 	if false == valid_ismn then
145 		text = text .. ' ' .. set_error( 'bad_ismn' )							-- add an error message if the ismn is invalid
146 	end 
147 	
148 	return text;
149 end
150 
151 
152 --[[--------------------------< I S S N >----------------------------------------------------------------------
153 
154 Validate and format an issn.  This code fixes the case where an editor has included an ISSN in the citation but
155 has separated the two groups of four digits with a space.  When that condition occurred, the resulting link looked
156 like this:
157 
158 	|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327]  -- can't have spaces in an external link
159 	
160 This code now prevents that by inserting a hyphen at the issn midpoint.  It also validates the issn for length
161 and makes sure that the checkdigit agrees with the calculated value.  Incorrect length (8 digits), characters
162 other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message.  The
163 issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
164 
165 ]]
166 
167 local function issn(id, e)
168 	local issn_copy = id;		-- save a copy of unadulterated issn; use this version for display if issn does not validate
169 	local handler;
170 	local text;
171 	local valid_issn = true;
172 	
173 	if e then
174 		 handler = cfg.id_handlers['EISSN'];
175 	else
176 		 handler = cfg.id_handlers['ISSN'];
177 	end
178 
179 	id=id:gsub( "[%s-–]", "" );									-- strip spaces, hyphens, and endashes from the issn
180 
181 	if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then		-- validate the issn: 8 digits long, containing only 0-9 or X in the last position
182 		valid_issn=false;										-- wrong length or improper character
183 	else
184 		valid_issn=is_valid_isxn(id, 8);						-- validate issn
185 	end
186 
187 	if true == valid_issn then
188 		id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 );	-- if valid, display correctly formatted version
189 	else
190 		id = issn_copy;											-- if not valid, use the show the invalid issn with error message
191 	end
192 	
193 	text = external_link_id({link = handler.link, label = handler.label,
194 		prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
195  
196 	if false == valid_issn then
197 		text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' )			-- add an error message if the issn is invalid
198 	end 
199 	
200 	return text
201 end
202 
203 
204 --[[--------------------------< A M A Z O N >------------------------------------------------------------------
205 
206 Formats a link to Amazon.  Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
207 characters.  If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
208 isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
209 Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
210 
211 ]]
212 
213 local function amazon(id, domain)
214 	local err_cat = ""
215 
216 	if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
217 		err_cat =  ' ' .. set_error ('bad_asin');								-- asin is not a mix of 10 uppercase alpha and numeric characters
218 	else
219 		if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then								-- if 10-digit numeric (or 9 digits with terminal X)
220 			if check_isbn( id ) then												-- see if asin value is isbn10
221 				add_maint_cat ('ASIN');
222 			elseif not is_set (err_cat) then
223 				err_cat =  ' ' .. set_error ('bad_asin');						-- asin is not isbn10
224 			end
225 		elseif not id:match("^%u[%d%u]+$") then
226 			err_cat =  ' ' .. set_error ('bad_asin');							-- asin doesn't begin with uppercase alpha
227 		end
228 	end
229 	if not is_set(domain) then 
230 		domain = "com";
231 	elseif in_array (domain, {'jp', 'uk'}) then			-- Japan, United Kingdom
232 		domain = "co." .. domain;
233 	elseif in_array (domain, {'au', 'br', 'mx'}) then	-- Australia, Brazil, Mexico
234 		domain = "com." .. domain;
235 	end
236 	local handler = cfg.id_handlers['ASIN'];
237 	return external_link_id({link=handler.link,
238 		label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
239 		id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
240 end
241 
242 
243 --[[--------------------------< A R X I V >--------------------------------------------------------------------
244 
245 See: http://arxiv.org/help/arxiv_identifier
246 
247 format and error check arXiv identifier.  There are three valid forms of the identifier:
248 the first form, valid only between date codes 9108 and 0703 is:
249 	arXiv:<archive>.<class>/<date code><number><version>
250 where:
251 	<archive> is a string of alpha characters - may be hyphenated; no other punctuation
252 	<class> is a string of alpha characters - may be hyphenated; no other punctuation
253 	<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
254 		first digit of YY for this form can only 9 and 0
255 	<number> is a three-digit number
256 	<version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
257 	
258 the second form, valid from April 2007 through December 2014 is:
259 	arXiv:<date code>.<number><version>
260 where:
261 	<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
262 	<number> is a four-digit number
263 	<version> is a 1 or more digit number preceded with a lowercase v; no spaces
264 
265 the third form, valid from January 2015 is:
266 	arXiv:<date code>.<number><version>
267 where:
268 	<date code> and <version> are as defined for 0704-1412
269 	<number> is a five-digit number
270 ]]
271 
272 local function arxiv (id, class)
273 	local handler = cfg.id_handlers['ARXIV'];
274 	local year, month, version;
275 	local err_cat = '';
276 	local text;
277 	
278 	if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then	-- test for the 9108-0703 format w/ & w/o version
279 		year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
280 		year = tonumber(year);
281 		month = tonumber(month);
282 		if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or		-- if invalid year or invalid month
283 			((91 == year and 7 > month) or (7 == year and 3 < month)) then		-- if years ok, are starting and ending months ok?
284 				err_cat = ' ' .. set_error( 'bad_arxiv' );						-- set error message
285 		end
286 	elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then	-- test for the 0704-1412 w/ & w/o version
287 		year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
288 		year = tonumber(year);
289 		month = tonumber(month);
290 		if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or			-- is year invalid or is month invalid? (doesn't test for future years)
291 			((7 == year) and (4 > month)) then --or									-- when year is 07, is month invalid (before April)?
292 				err_cat = ' ' .. set_error( 'bad_arxiv' );						-- set error message
293 		end
294 	elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then	-- test for the 1501- format w/ & w/o version
295 		year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
296 		year = tonumber(year);
297 		month = tonumber(month);
298 		if ((15 > year) or (1 > month or 12 < month)) then						-- is year invalid or is month invalid? (doesn't test for future years)
299 			err_cat = ' ' .. set_error( 'bad_arxiv' );							-- set error message
300 		end
301 	else
302 		err_cat = ' ' .. set_error( 'bad_arxiv' );								-- arXiv id doesn't match any format
303 	end
304 
305 	text = external_link_id({link = handler.link, label = handler.label,
306 			prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
307 
308 	if is_set (class) then
309 		class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]';	-- external link within square brackets, not wikilink
310 	else
311 		class = '';																-- empty string for concatenation
312 	end
313 	
314 	return text .. class;
315 end
316 
317 
318 --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
319 
320 lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
321 1. Remove all blanks.
322 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
323 3. If there is a hyphen in the string:
324 	a. Remove it.
325 	b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
326 		1. All these characters should be digits, and there should be six or less. (not done in this function)
327 		2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
328 
329 Returns a normalized lccn for lccn() to validate.  There is no error checking (step 3.b.1) performed in this function.
330 ]]
331 
332 local function normalize_lccn (lccn)
333 	lccn = lccn:gsub ("%s", "");									-- 1. strip whitespace
334 
335 	if nil ~= string.find (lccn,'/') then
336 		lccn = lccn:match ("(.-)/");								-- 2. remove forward slash and all character to the right of it
337 	end
338 
339 	local prefix
340 	local suffix
341 	prefix, suffix = lccn:match ("(.+)%-(.+)");						-- 3.a remove hyphen by splitting the string into prefix and suffix
342 
343 	if nil ~= suffix then											-- if there was a hyphen
344 		suffix=string.rep("0", 6-string.len (suffix)) .. suffix;	-- 3.b.2 left fill the suffix with 0s if suffix length less than 6
345 		lccn=prefix..suffix;										-- reassemble the lccn
346 	end
347 	
348 	return lccn;
349 	end
350 
351 
352 --[[--------------------------< L C C N >----------------------------------------------------------------------
353 
354 Format LCCN link and do simple error checking.  LCCN is a character string 8-12 characters long. The length of
355 the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits.
356 http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
357 
358 length = 8 then all digits
359 length = 9 then lccn[1] is lower case alpha
360 length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
361 length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
362 length = 12 then lccn[1] and lccn[2] are both lower case alpha
363 
364 ]]
365 
366 local function lccn(lccn)
367 	local handler = cfg.id_handlers['LCCN'];
368 	local err_cat =  '';								-- presume that LCCN is valid
369 	local id = lccn;									-- local copy of the lccn
370 
371 	id = normalize_lccn (id);							-- get canonical form (no whitespace, hyphens, forward slashes)
372 	local len = id:len();								-- get the length of the lccn
373 
374 	if 8 == len then
375 		if id:match("[^%d]") then						-- if LCCN has anything but digits (nil if only digits)
376 			err_cat = ' ' .. set_error( 'bad_lccn' );	-- set an error message
377 		end
378 	elseif 9 == len then								-- LCCN should be adddddddd
379 		if nil == id:match("%l%d%d%d%d%d%d%d%d") then			-- does it match our pattern?
380 			err_cat = ' ' .. set_error( 'bad_lccn' );	-- set an error message
381 		end
382 	elseif 10 == len then								-- LCCN should be aadddddddd or dddddddddd
383 		if id:match("[^%d]") then							-- if LCCN has anything but digits (nil if only digits) ...
384 			if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then	-- ... see if it matches our pattern
385 				err_cat = ' ' .. set_error( 'bad_lccn' );	-- no match, set an error message
386 			end
387 		end
388 	elseif 11 == len then								-- LCCN should be aaadddddddd or adddddddddd
389 		if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then	-- see if it matches one of our patterns
390 			err_cat = ' ' .. set_error( 'bad_lccn' );	-- no match, set an error message
391 		end
392 	elseif 12 == len then								-- LCCN should be aadddddddddd
393 		if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then	-- see if it matches our pattern
394 			err_cat = ' ' .. set_error( 'bad_lccn' );	-- no match, set an error message
395 		end
396 	else
397 		err_cat = ' ' .. set_error( 'bad_lccn' );		-- wrong length, set an error message
398 	end
399 
400 	if not is_set (err_cat) and nil ~= lccn:find ('%s') then
401 		err_cat = ' ' .. set_error( 'bad_lccn' );		-- lccn contains a space, set an error message
402 	end
403 
404 	return external_link_id({link = handler.link, label = handler.label,
405 			prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
406 end
407 
408 
409 --[[--------------------------< P M I D >----------------------------------------------------------------------
410 
411 Format PMID and do simple error checking.  PMIDs are sequential numbers beginning at 1 and counting up.  This
412 code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
413 test_limit will need to be updated periodically as more PMIDs are issued.
414 
415 ]]
416 
417 local function pmid(id)
418 	local test_limit = 30000000;						-- update this value as PMIDs approach
419 	local handler = cfg.id_handlers['PMID'];
420 	local err_cat =  '';								-- presume that PMID is valid
421 	
422 	if id:match("[^%d]") then							-- if PMID has anything but digits
423 		err_cat = ' ' .. set_error( 'bad_pmid' );		-- set an error message
424 	else												-- PMID is only digits
425 		local id_num = tonumber(id);					-- convert id to a number for range testing
426 		if 1 > id_num or test_limit < id_num then		-- if PMID is outside test limit boundaries
427 			err_cat = ' ' .. set_error( 'bad_pmid' );	-- set an error message
428 		end
429 	end
430 	
431 	return external_link_id({link = handler.link, label = handler.label,
432 			prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
433 end
434 
435 
436 --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
437 
438 Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date.  If embargo date is
439 in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
440 |embargo= was not set in this cite.
441 
442 ]]
443 
444 local function is_embargoed (embargo)
445 	if is_set (embargo) then
446 		local lang = mw.getContentLanguage();
447 		local good1, embargo_date, good2, todays_date;
448 		good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
449 		good2, todays_date = pcall( lang.formatDate, lang, 'U' );
450 	
451 		if good1 and good2 then													-- if embargo date and today's date are good dates
452 			if tonumber( embargo_date ) >= tonumber( todays_date ) then			-- is embargo date is in the future?
453 				return embargo;													-- still embargoed
454 			else
455 				add_maint_cat ('embargo')
456 				return '';														-- unset because embargo has expired
457 			end
458 		end
459 	end
460 	return '';																	-- |embargo= not set return empty string
461 end
462 
463 
464 --[[--------------------------< P M C >------------------------------------------------------------------------
465 
466 Format a PMC, do simple error checking, and check for embargoed articles.
467 
468 The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
469 be linked to the article.  If the embargo date is today or in the past, or if it is empty or omitted, then the
470 PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
471 
472 PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
473 has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link.  Function is_embargoed ()
474 returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
475 
476 PMCs are sequential numbers beginning at 1 and counting up.  This code checks the PMC to see that it contains only digits and is less
477 than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
478 
479 ]]
480 
481 local function pmc(id, embargo)
482 	local test_limit = 5000000;							-- update this value as PMCs approach
483 	local handler = cfg.id_handlers['PMC'];
484 	local err_cat =  '';								-- presume that PMC is valid
485 	
486 	local text;
487 
488 	if id:match("[^%d]") then							-- if PMC has anything but digits
489 		err_cat = ' ' .. set_error( 'bad_pmc' );			-- set an error message
490 	else												-- PMC is only digits
491 		local id_num = tonumber(id);					-- convert id to a number for range testing
492 		if 1 > id_num or test_limit < id_num then		-- if PMC is outside test limit boundaries
493 			err_cat = ' ' .. set_error( 'bad_pmc' );		-- set an error message
494 		end
495 	end
496 	
497 	if is_set (embargo) then													-- is PMC is still embargoed?
498 		text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat;	-- still embargoed so no external link
499 	else
500 		text = external_link_id({link = handler.link, label = handler.label,			-- no embargo date or embargo has expired, ok to link to article
501 			prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
502 	end
503 	return text;
504 end
505 
506 
507 --[[--------------------------< D O I >------------------------------------------------------------------------
508 
509 Formats a DOI and checks for DOI errors.
510 
511 DOI names contain two parts: prefix and suffix separated by a forward slash.
512 	Prefix: directory indicator '10.' followed by a registrant code
513 	Suffix: character string of any length chosen by the registrant
514 
515 This function checks a DOI name for: prefix/suffix.  If the doi name contains spaces or endashes, or, if it ends
516 with a period or a comma, this function will emit a bad_doi error message.
517 
518 DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
519 and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
520 if ever used in doi names.
521 
522 ]]
523 
524 local function doi(id, inactive)
525 	local cat = ""
526 	local handler = cfg.id_handlers['DOI'];
527 	
528 	local text;
529 	if is_set(inactive) then
530 		local inactive_year = inactive:match("%d%d%d%d") or '';		-- try to get the year portion from the inactive date
531 		text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;
532 		if is_set(inactive_year) then
533 			table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
534 		else
535 			table.insert( z.error_categories, "Pages with inactive DOIs" );	-- when inactive doesn't contain a recognizable year
536 		end
537 		inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" 
538 	else 
539 		text = external_link_id({link = handler.link, label = handler.label,
540 			prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
541 		inactive = "" 
542 	end
543 
544 	if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then	-- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
545 		cat = ' ' .. set_error( 'bad_doi' );
546 	end
547 	return text .. inactive .. cat 
548 end
549 
550 
551 --[[--------------------------< H D L >------------------------------------------------------------------------
552 
553 Formats an HDL with minor error checking.
554 
555 HDL names contain two parts: prefix and suffix separated by a forward slash.
556 	Prefix: character string using any character in the UCS-2 character set except '/'
557 	Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant
558 
559 This function checks a HDL name for: prefix/suffix.  If the HDL name contains spaces, endashes, or, if it ends
560 with a period or a comma, this function will emit a bad_hdl error message.
561 
562 HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and
563 terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
564 if ever used in HDLs.
565 
566 ]]
567 
568 local function hdl(id)
569 	local handler = cfg.id_handlers['HDL'];
570 	
571 	local text = external_link_id({link = handler.link, label = handler.label,
572 			prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
573 
574 	if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then							-- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
575 		text = text .. ' ' .. set_error( 'bad_hdl' );
576 	end
577 	return text;
578 end
579 
580 
581 --[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
582 
583 Formats an OpenLibrary link, and checks for associated errors.
584 
585 ]]
586 
587 local function openlibrary(id)
588 	local code = id:match("^%d+([AMW])$");										-- only digits followed by 'A', 'M', or 'W'
589 	local handler = cfg.id_handlers['OL'];
590 
591 	if ( code == "A" ) then
592 		return external_link_id({link=handler.link, label=handler.label,
593 			prefix=handler.prefix .. 'authors/OL',
594 			id=id, separator=handler.separator,	encode = handler.encode})
595 	elseif ( code == "M" ) then
596 		return external_link_id({link=handler.link, label=handler.label,
597 			prefix=handler.prefix .. 'books/OL',
598 			id=id, separator=handler.separator,	encode = handler.encode})
599 	elseif ( code == "W" ) then
600 		return external_link_id({link=handler.link, label=handler.label,
601 			prefix=handler.prefix .. 'works/OL',
602 			id=id, separator=handler.separator,	encode = handler.encode})
603 	else
604 		return external_link_id({link=handler.link, label=handler.label,
605 			prefix=handler.prefix .. 'OL',
606 			id=id, separator=handler.separator,	encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
607 	end
608 end
609 
610 
611 --[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
612 
613 Validate and format a usenet message id.  Simple error checking, looks for 'id-left@id-right' not enclosed in
614 '<' and/or '>' angle brackets.
615 
616 ]]
617 
618 local function message_id (id)
619 	local handler = cfg.id_handlers['USENETID'];
620 
621 	local text = external_link_id({link = handler.link, label = handler.label,
622 		prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
623  
624 	if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then				-- doesn't have '@' or has one or first or last character is '< or '>'
625 		text = text .. ' ' .. set_error( 'bad_message_id' )						-- add an error message if the message id is invalid
626 	end 
627 	
628 	return text
629 end
630 
631 
632 --[[--------------------------< O C L C >----------------------------------------------------------------------
633 
634 Validate and format an oclc id.  https://www.oclc.org/batchload/controlnumber.en.html
635 
636 ]]
637 
638 local function oclc (id)
639 	local handler = cfg.id_handlers['OCLC'];
640 	local number;
641 	local err_msg = '';															-- empty string for concatenation
642 	
643 	if id:match('^ocm%d%d%d%d%d%d%d%d$') then									-- ocm prefix and 8 digits; 001 field (12 characters)
644 		number = id:match('ocm(%d+)');											-- get the number
645 	elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then								-- ocn prefix and 9 digits; 001 field (12 characters)
646 		number = id:match('ocn(%d+)');											-- get the number
647 	elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then							-- on prefix and 10 or more digits; 001 field (12 characters)
648 		number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$');						-- get the number
649 	elseif id:match('^%(OCoLC%)[1-9]%d*$') then									-- (OCoLC) prefix and variable number digits; no leading zeros; 035 field
650 		number = id:match('%(OCoLC%)([1-9]%d*)');								-- get the number
651 		if 9 < number:len() then
652 			number = nil;														-- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers
653 		end
654 	elseif id:match('^%d+$') then												-- no prefix
655 		number = id;															-- get the number
656 		if 10 < number:len() then
657 			number = nil;														-- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers
658 		end
659 	end
660 
661 	if number then																-- proper format
662 		id = number;															-- exclude prefix, if any, from external link
663 	else
664 		err_msg = ' ' .. set_error( 'bad_oclc' )								-- add an error message if the id is malformed
665 	end
666 	
667 	local text = external_link_id({link=handler.link, label=handler.label,
668 		prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
669 
670 	return text;
671 end
672 
673 
674 --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
675 
676 Takes a table of IDs created by extract_ids() and turns it into a table of formatted ID outputs.
677 
678 inputs:
679 	id_list – table of identifiers built by extract_ids()
680 	options – table of various template parameter values used to modify some manually handled identifiers
681 
682 ]]
683 
684 local function build_id_list( id_list, options )
685 	local new_list, handler = {};
686 
687 	local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;
688 	
689 	for k, v in pairs( id_list ) do												-- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
690 		-- fallback to read-only cfg
691 		handler = setmetatable( { ['id'] = v }, fallback(k) );
692 		
693 		if handler.mode == 'external' then
694 			table.insert( new_list, {handler.label, external_link_id( handler ) } );
695 		elseif handler.mode == 'internal' then
696 			table.insert( new_list, {handler.label, internal_link_id( handler ) } );
697 		elseif handler.mode ~= 'manual' then
698 			error( cfg.messages['unknown_ID_mode'] );
699 		elseif k == 'DOI' then
700 			table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } );
701 		elseif k == 'HDL' then
702 			table.insert( new_list, {handler.label, hdl( v ) } );
703 		elseif k == 'ARXIV' then
704 			table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); 
705 		elseif k == 'ASIN' then
706 			table.insert( new_list, {handler.label, amazon( v, options.ASINTLD ) } ); 
707 		elseif k == 'LCCN' then
708 			table.insert( new_list, {handler.label, lccn( v ) } );
709 		elseif k == 'OL' or k == 'OLA' then
710 			table.insert( new_list, {handler.label, openlibrary( v ) } );
711 		elseif k == 'PMC' then
712 			table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
713 		elseif k == 'PMID' then
714 			table.insert( new_list, {handler.label, pmid( v ) } );
715 		elseif k == 'OCLC' then
716 			table.insert( new_list, {handler.label, oclc( v ) } );
717 		elseif k == 'ISMN' then
718 			table.insert( new_list, {handler.label, ismn( v ) } );
719 		elseif k == 'ISSN' then
720 			table.insert( new_list, {handler.label, issn( v ) } );
721 		elseif k == 'EISSN' then
722 			table.insert( new_list, {handler.label, issn( v, true ) } );		-- true distinguishes eissn from issn
723 		elseif k == 'ISBN' then
724 			local ISBN = internal_link_id( handler );
725 			if not check_isbn( v ) and not is_set(options.IgnoreISBN) then
726 				ISBN = ISBN .. set_error( 'bad_isbn', {}, false, " ", "" );
727 			end
728 			table.insert( new_list, {handler.label, ISBN } );				
729 		elseif k == 'USENETID' then
730 			table.insert( new_list, {handler.label, message_id( v ) } );
731 		else
732 			error( cfg.messages['unknown_manual_ID'] );
733 		end
734 	end
735 	
736 	local function comp( a, b )	-- used in following table.sort()
737 		return a[1] < b[1];
738 	end
739 	
740 	table.sort( new_list, comp );
741 	for k, v in ipairs( new_list ) do
742 		new_list[k] = v[2];
743 	end
744 	
745 	return new_list;
746 end
747 
748 
749 --[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------
750 
751 Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
752 any of the parameters listed in each cfg.id_handlers['...'].parameters.  If found, adds the parameter and value to
753 the identifier list.  Emits redundant error message is more than one alias exists in args
754 
755 ]]
756 
757 local function extract_ids( args )
758 	local id_list = {};															-- list of identifiers found in args
759 	for k, v in pairs( cfg.id_handlers ) do										-- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
760 		v = select_one( args, v.parameters, 'redundant_parameters' );			-- v.parameters is a table of aliases for k; here we pick one from args if present
761 		if is_set(v) then id_list[k] = v; end									-- if found in args, add identifier to our list
762 	end
763 	return id_list;
764 end
765 
766 
767 --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
768 
769 Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
770 
771 ]]
772 
773 local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
774 	cfg = cfg_table_ptr;
775 
776 	is_set = utilities_page_ptr.is_set;											-- import functions from select Module:Citation/CS1/Utilities module
777 	in_array = utilities_page_ptr.in_array;
778 	set_error = utilities_page_ptr.set_error;
779 	select_one = utilities_page_ptr.select_one;
780 	add_maint_cat = utilities_page_ptr.add_maint_cat;
781 
782 	z = utilities_page_ptr.z;													-- table of tables in Module:Citation/CS1/Utilities
783 end
784 
785 
786 
787 return {
788 	build_id_list = build_id_list,
789 	extract_ids = extract_ids,
790 	is_embargoed = is_embargoed;
791 	set_selected_modules = set_selected_modules;
792 	}