置顶公告:【置顶】关于临时开启评论区所有功能的公告(2022.10.22) | 【置顶】关于本站Widget恢复使用的公告
  • 你好~!欢迎来到萌娘百科镜像站!如需查看或编辑,请联系本站管理员注册账号。
  • 本镜像站和其他萌娘百科的镜像站无关,请注意分别。

Module:Citation/CS1/Identifiers

猛汉♂百科,万男皆可猛的百科全书!转载请标注来源页面的网页链接,并声明引自猛汉百科。内容不可商用。
跳到导航 跳到搜索
Template-info.svg 模块文档  [创建] [刷新]
  1. --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
  2. ]]
  3. local is_set, in_array, wrap_style; -- functions in Module:Citation/CS1/Utilities
  4. local append_error, set_error, throw_error, select_one, add_maint_cat; -- functions in Module:Citation/CS1/Error
  5. local make_internal_link; -- functions in Module:Citation/CS1/Links
  6. local is_valid_date_from_a_point; -- functions in Module:Citation/CS1/Date_validation
  7. local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
  8. local wd_int_lang = (mw.site.server:match ('wikidata') and mw.getCurrentFrame():preprocess('{{int:lang}}')) or '';
  9. --============================<< H E L P E R F U N C T I O N S >>============================================
  10. --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
  11. Formats a wiki style external link
  12. 警告:该函数与英文站CS1模块中相应函数不兼容,请勿盲目替换!
  13. ]]
  14. local function external_link_id(options)
  15. local url_string = options.link_id or options.id;
  16. local ext_link;
  17. if options.encode == true or options.encode == nil then
  18. url_string = mw.uri.encode( url_string );
  19. end
  20. ext_link = mw.ustring.format( '[%s%s%s \<span title\=\"%s\"\>%s%s%s\<\/span\>]',
  21. options.prefix, url_string, options.suffix or "",
  22. options.link, options.label, options.separator or "&nbsp;",
  23. mw.text.nowiki(options.id)
  24. );
  25. if is_set (options.access) then
  26. ext_link = wrap_style ('access-signal', {ext_link, cfg.presentation[options.access]}); -- add the free-to-read / paywall lock
  27. end
  28. return ext_link;
  29. end
  30. --[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------
  31. Formats a wiki style internal link
  32. ]]
  33. local function internal_link_id(options)
  34. return mw.ustring.format( '[[%s%s%s|\<span title\=\"%s\"\>%s\<\/span\>%s%s]]',
  35. options.prefix, options.id, options.suffix or "",
  36. options.link, options.label, options.separator or "&nbsp;",
  37. mw.text.nowiki(options.id)
  38. );
  39. end
  40. --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
  41. Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
  42. in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
  43. |embargo= was not set in this cite.
  44. ]]
  45. local function is_embargoed (embargo)
  46. if is_set (embargo) then
  47. local lang = mw.getContentLanguage();
  48. local good1, embargo_date, good2, todays_date;
  49. good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
  50. good2, todays_date = pcall( lang.formatDate, lang, 'U' );
  51. if good1 and good2 then -- if embargo date and today's date are good dates
  52. if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
  53. return embargo; -- still embargoed
  54. else
  55. add_maint_cat ('embargo')
  56. return ''; -- unset because embargo has expired
  57. end
  58. end
  59. end
  60. return ''; -- |embargo= not set return empty string
  61. end
  62. --[[--------------------------< IS _ V A L I D _ C H I N E S E _ B O O K _ C A T E G O R Y >----------------------
  63. 检查是否为GB/T 9999.1-2018附表B.1规定的合法图书分类
  64. ]]
  65. local function is_valid_Chinese_book_category (str)
  66. return in_array (str, {
  67. "A",
  68. "B", "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B80", "B82", "B83", "B84", "B9",
  69. "C", "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C91", "C92", "C93", "C94", "C95", "C96", "C97",
  70. "D", "D0", "D1", "D2", "D33", "D4", "D5", "D6", "D73", "D8", "D9", "DF",
  71. "E", "E0", "E1", "E2", "E3", "E8", "E9", "E99",
  72. "F", "F0", "F1", "F2", "F3", "F4", "F49", "F5", "F59", "F6", "F7", "F8",
  73. "G", "G0", "G1", "G2", "G3", "G4", "G8",
  74. "H", "H0", "H1", "H2", "H3", "H4", "H5", "H61", "H62", "H63", "H64", "H65", "H66", "H67", "H7", "H81", "H83", "H84", "H9",
  75. "I", "I0", "I1", "I2", "I3", "I7",
  76. "J", "J0", "J1", "J19", "J2", "J29", "J3", "J4", "J5", "J59", "J6", "J7", "J8", "J9",
  77. "K", "K0", "K1", "K2", "K3", "K4", "K5", "K6", "K7", "K81", "K85", "K89", "K9",
  78. "N", "N0", "N1", "N2", "N3", "N4", "N5", "N6", "N7", "N79", "N8", "N91", "N93", "N94", "N99",
  79. "O", "O1", "O3", "O4", "O6", "O7",
  80. "P", "P1", "P2", "P3", "P4", "P5", "P6", "P7", "P9",
  81. "Q", "Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q81", "Q89", "Q91", "Q93", "Q94", "Q95", "Q96", "Q98",
  82. "R", "R1", "R2", "R3", "R4", "R5", "R6", "R71", "R72", "R73", "R74", "R75", "R76", "R77", "R78", "R79", "R8", "R9",
  83. "S", "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9",
  84. "T", "TB", "TD", "TE", "TF", "TG", "TH", "TJ", "TK", "TL", "TM", "TN", "TP", "TQ", "TS", "TU", "TV",
  85. "U", "U1", "U2", "U4", "U6", "U8",
  86. "V", "V1", "V2", "V4", "V7",
  87. "X", "X1", "X2", "X3", "X4", "X5", "X7", "X8", "X9",
  88. "Z"
  89. });
  90. end
  91. --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
  92. ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit.
  93. ISBN-13 is checked in isbn().
  94. If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length
  95. and stripped of dashes, spaces and other non-isxn characters.
  96. ]]
  97. local function is_valid_isxn (isxn_str, len)
  98. local temp = 0;
  99. isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
  100. len = len+1; -- adjust to be a loop counter
  101. for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
  102. if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
  103. temp = temp + 10*( len - i ); -- it represents 10 decimal
  104. else
  105. temp = temp + tonumber( string.char(v) )*(len-i);
  106. end
  107. end
  108. return temp % 11 == 0; -- returns true if calculation result is zero
  109. end
  110. --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------
  111. ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
  112. If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
  113. and stripped of dashes, spaces and other non-isxn-13 characters.
  114. ]]
  115. local function is_valid_isxn_13 (isxn_str)
  116. local temp=0;
  117. isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
  118. for i, v in ipairs( isxn_str ) do
  119. temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
  120. end
  121. return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct
  122. end
  123. --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
  124. lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
  125. 1. Remove all blanks.
  126. 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
  127. 3. If there is a hyphen in the string:
  128. a. Remove it.
  129. b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
  130. 1. All these characters should be digits, and there should be six or less. (not done in this function)
  131. 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
  132. Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
  133. ]]
  134. local function normalize_lccn (lccn)
  135. lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
  136. if nil ~= string.find (lccn,'/') then
  137. lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
  138. end
  139. local prefix
  140. local suffix
  141. prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
  142. if nil ~= suffix then -- if there was a hyphen
  143. suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
  144. lccn=prefix..suffix; -- reassemble the lccn
  145. end
  146. return lccn;
  147. end
  148. --============================<< I D E N T I F I E R F U N C T I O N S >>====================================
  149. --[[--------------------------< A R X I V >--------------------------------------------------------------------
  150. See: http://arxiv.org/help/arxiv_identifier
  151. format and error check arXiv identifier. There are three valid forms of the identifier:
  152. the first form, valid only between date codes 9108 and 0703 is:
  153. arXiv:<archive>.<class>/<date code><number><version>
  154. where:
  155. <archive> is a string of alpha characters - may be hyphenated; no other punctuation
  156. <class> is a string of alpha characters - may be hyphenated; no other punctuation; not the same as |class= parameter which is not supported in this form
  157. <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
  158. first digit of YY for this form can only 9 and 0
  159. <number> is a three-digit number
  160. <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
  161. the second form, valid from April 2007 through December 2014 is:
  162. arXiv:<date code>.<number><version>
  163. where:
  164. <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
  165. <number> is a four-digit number
  166. <version> is a 1 or more digit number preceded with a lowercase v; no spaces
  167. the third form, valid from January 2015 is:
  168. arXiv:<date code>.<number><version>
  169. where:
  170. <date code> and <version> are as defined for 0704-1412
  171. <number> is a five-digit number
  172. ]]
  173. local function arxiv (id, class)
  174. local handler = cfg.id_handlers['ARXIV'];
  175. local year, month, version;
  176. local err_cat = false; -- assume no error message
  177. local text; -- output text
  178. if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
  179. year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
  180. year = tonumber(year);
  181. month = tonumber(month);
  182. if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
  183. ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
  184. err_cat = true; -- flag for error message
  185. end
  186. elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
  187. year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
  188. year = tonumber(year);
  189. month = tonumber(month);
  190. if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
  191. ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
  192. err_cat = true; -- flag for error message
  193. end
  194. elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
  195. year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
  196. year = tonumber(year);
  197. month = tonumber(month);
  198. if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
  199. err_cat = true; -- flag for error message
  200. end
  201. else
  202. err_cat = true; -- not a recognized format; flag for error message
  203. end
  204. err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true
  205. text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
  206. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
  207. if is_set (class) then
  208. if id:match ('^%d+') then
  209. text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
  210. else
  211. text = table.concat ({text, ' ', set_error ('class_ignored')});
  212. end
  213. end
  214. return text;
  215. end
  216. --[[--------------------------< B I B C O D E >--------------------------------------------------------------------
  217. Validates (sort of) and formats a bibcode id.
  218. Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
  219. But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters
  220. and first four digits must be a year. This function makes these tests:
  221. length must be 19 characters
  222. characters in position
  223. 1–4 must be digits and must represent a year in the range of 1000 – next year
  224. 5 must be a letter
  225. 6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. )
  226. 7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
  227. 9–18 must be letter, digit, or dot
  228. 19 must be a letter or dot
  229. ]]
  230. local function bibcode (id, access)
  231. local handler = cfg.id_handlers['BIBCODE'];
  232. local err_type;
  233. local year;
  234. local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,
  235. prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
  236. access=access});
  237. if 19 ~= id:len() then
  238. err_type = 'length';
  239. else
  240. year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") --
  241. if not year then -- if nil then no pattern match
  242. err_type = 'value'; -- so value error
  243. else
  244. local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year
  245. year = tonumber (year); -- convert year portion of bibcode to a number
  246. if (1000 > year) or (year > next_year) then
  247. err_type = 'year'; -- year out of bounds
  248. end
  249. if id:find('&%.') then
  250. err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter)
  251. end
  252. end
  253. end
  254. if is_set (err_type) then -- if there was an error detected
  255. text = text .. ' ' .. set_error( 'bad_bibcode', {err_type});
  256. end
  257. return text;
  258. end
  259. --[[--------------------------< B I O R X I V >-----------------------------------------------------------------
  260. Format bioRxiv id and do simple error checking. BiorXiv ids are exactly 6 digits.
  261. The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI:
  262. https://doi.org/10.1101/078733 -> 078733
  263. 2019年底,biorxiv更换新格式,故而有必要兼容新旧两种格式,对该函数作出针对性修改
  264. ]]
  265. local function biorxiv (id)
  266. local handler = cfg.id_handlers['BIORXIV'];
  267. local err_cat = ''; -- presume that bioRxiv id is valid
  268. local invalid = false;
  269. id = id:gsub ("^10.1101/",""); -- doi前缀10.1101/可填可不填,便利用户使用
  270. if nil == id:match ("^%d%d%d%d%d%d$") then -- 不是旧格式
  271. local date_str;
  272. if (nil ~= id:match ("^%d%d%d%d%.[01]%d%.[0-3]%d%.%d%d%d%d%d%d$")) then
  273. date_str = id:match ("^(%d%d%d%d%.[01]%d%.[0-3]%d)%.%d%d%d%d%d%d$");
  274. else
  275. if (nil ~= id:match ("^%d%d%d%d%.[01]%d%.[0-3]%d%.%d%d%d%d%d%dv%d+$")) then
  276. date_str = id:match ("^(%d%d%d%d%.[01]%d%.[0-3]%d)%.%d%d%d%d%d%dv%d+$");
  277. else -- 也不匹配新格式,因而为非法格式
  278. invalid = true;
  279. end
  280. end
  281. if (not invalid) then
  282. date_str = date_str:gsub ("%.", "-");
  283. if(not is_valid_date_from_a_point(date_str, 1576022400)) then
  284. invalid = true;
  285. end
  286. end
  287. end
  288. if (invalid) then
  289. err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message
  290. end
  291. return external_link_id({link = handler.link, label = handler.label, q = handler.q,
  292. prefix = handler.prefix, id = "10.1101/" .. id, separator = handler.separator,
  293. encode = handler.encode, access = handler.access}) .. err_cat;
  294. end
  295. --[[--------------------------< C I T E S E E R X >------------------------------------------------------------
  296. CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org).
  297. The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure
  298. ]]
  299. local function citeseerx (id)
  300. local handler = cfg.id_handlers['CITESEERX'];
  301. local matched;
  302. local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,
  303. prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode,
  304. access=handler.access});
  305. matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
  306. if not matched then
  307. text = text .. ' ' .. set_error( 'bad_citeseerx' );
  308. end
  309. return text;
  310. end
  311. --[[--------------------------< C N I D >----------------------------------------------------------------------
  312. 判断国内统一刊号的合法性及输出相关内容
  313. ]]
  314. local function cnid (id)
  315. local handler = cfg.id_handlers['CNID'];
  316. local text;
  317. local type = 0;
  318. local invalid = false;
  319. local AA, BBBB, CC;
  320. id = id:gsub ("^CN ?", "");
  321. if nil ~= id:match ("^%d%d%-%d%d%d%d$") then
  322. AA, BBBB = id:match ("^(%d%d)%-(%d%d%d%d)$");
  323. else
  324. if nil ~= id:match ("^%d%d%-%d%d%d%d/[A-Z0-9]+$") then
  325. AA, BBBB, CC = id:match ("^(%d%d)%-(%d%d%d%d)/([A-Z0-9]+)$")
  326. else invalid = true;
  327. end
  328. end
  329. if (not invalid) then
  330. if not in_array (AA, {"09", "10", "11", -- 北京
  331. "12", "13", "14", "15", -- 华北
  332. "21", "22", "23", -- 东北
  333. "30", "31", "32", "33", "34", "35", "36", "37", -- 华东
  334. "41", "42", "43", "44", "45", "46", -- 华中华南
  335. "50", "51", "52", "53", "54", -- 西南
  336. "61", "62", "63", "64", "65" -- 西北
  337. }) then
  338. invalid = true ;
  339. else
  340. local BBBB_num = tonumber (BBBB);
  341. if (BBBB_num >= 1 and BBBB_num <= 999) then
  342. type = 1;
  343. if (nil ~= CC) then
  344. invalid = true;
  345. end;
  346. else
  347. if (BBBB_num >= 1000 and BBBB_num <= 5999) then
  348. type = 2;
  349. if (not is_valid_Chinese_book_category (CC)) then
  350. invalid = true;
  351. end
  352. else
  353. type = 3;
  354. if (nil ~= CC and not is_valid_Chinese_book_category (CC)) then
  355. invalid = true;
  356. end
  357. end
  358. end
  359. end
  360. end
  361. if (not invalid) and (1 == type) then
  362. local link_id = id:gsub ("-","");
  363. text = external_link_id({link = handler.link, label = handler.label,
  364. q = handler.q, prefix = handler.prefix, suffix = "&typeNum=1",
  365. link_id = link_id, id = id,
  366. separator = handler.separator, encode = handler.encode});
  367. else
  368. if (not invalid) and (2 == type) then
  369. text = external_link_id({link = handler.link, label = handler.label,
  370. q = handler.q, prefix = handler.prefix, suffix = "&typeNum=2",
  371. id = id, separator = handler.separator, encode = handler.encode});
  372. else
  373. text = mw.ustring.format( "<span title\=\"%s\"\>%s%s%s\<\/span\>",
  374. handler.link, handler.label, handler.separator,
  375. mw.text.nowiki (id)
  376. );
  377. end
  378. end
  379. if (invalid) then
  380. text = text .. ' ' .. set_error( 'bad_cnid');
  381. end
  382. return text;
  383. end
  384. --[[--------------------------< C S B N >----------------------------------------------------------------------
  385. 判断CSBN的合法性及产生指向豆瓣网的链接。
  386. CSBN格式参考《谈谈国家统一书号与国际标准书号》。
  387. ]]
  388. local function csbn (id)
  389. local handler = cfg.id_handlers['CSBN'];
  390. local text;
  391. local invalid = false;
  392. id = id:gsub ("%-","·");
  393. if (nil == id:match ("^[1-9]%d?%d%d%d·%d+$")) then
  394. -- CSBN由三部分组成,第一部分为中国人民大学图书分类法代号,取值1-17;
  395. -- 第二部分为出版社代号,三位;第三部分为种次号,取值不限。
  396. -- 二、三部分间有小圆点;若取值不合该格式,则不合法。
  397. -- 此外,虽然小圆点用"-"取代不合法,但站内误用较多,这里兼容之。
  398. invalid = true;
  399. else
  400. local category = id:match ("^([1-9]%d?)%d%d%d·%d+$");
  401. local cat_num = tonumber (category);
  402. if (cat_num <=0 or cat_num >17) then
  403. -- 若分类号取值不在1-17范围内,则不合法。
  404. invalid = true;
  405. end
  406. end
  407. -- 豆瓣网以连接号取代小圆点,故替换之。
  408. local link_id = id:gsub ("·","%-");
  409. text = external_link_id({link = handler.link, label = handler.label,
  410. q = handler.q, prefix = handler.prefix,
  411. link_id = link_id, id = id,
  412. separator = handler.separator, encode = handler.encode});
  413. if (invalid) then
  414. text = text .. ' ' .. set_error( 'bad_csbn');
  415. end
  416. return text;
  417. end
  418. --[[--------------------------< D O I >------------------------------------------------------------------------
  419. Formats a DOI and checks for DOI errors.
  420. DOI names contain two parts: prefix and suffix separated by a forward slash.
  421. Prefix: directory indicator '10.' followed by a registrant code
  422. Suffix: character string of any length chosen by the registrant
  423. This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends
  424. with a period or a comma, this function will emit a bad_doi error message.
  425. DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
  426. and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
  427. if ever used in doi names.
  428. ]]
  429. local function doi(id, inactive, access)
  430. local cat = ""
  431. local handler = cfg.id_handlers['DOI'];
  432. local text;
  433. if is_set(inactive) then
  434. local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
  435. if is_set(inactive_year) then
  436. set_error('doi_inactive_dated', {inactive_year});
  437. else
  438. set_error('doi_inactive'); -- when inactive doesn't contain a recognizable year
  439. end
  440. inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
  441. end
  442. text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
  443. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '')
  444. if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
  445. cat = ' ' .. set_error( 'bad_doi' );
  446. end
  447. return text .. cat
  448. end
  449. --[[--------------------------< H D L >------------------------------------------------------------------------
  450. Formats an HDL with minor error checking.
  451. HDL names contain two parts: prefix and suffix separated by a forward slash.
  452. Prefix: character string using any character in the UCS-2 character set except '/'
  453. Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant
  454. This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends
  455. with a period or a comma, this function will emit a bad_hdl error message.
  456. HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and
  457. terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
  458. if ever used in HDLs.
  459. ]]
  460. local function hdl(id, access)
  461. local handler = cfg.id_handlers['HDL'];
  462. local text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
  463. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access})
  464. if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
  465. text = text .. ' ' .. set_error( 'bad_hdl' );
  466. end
  467. return text;
  468. end
  469. --[[--------------------------< I S B N >----------------------------------------------------------------------
  470. Determines whether an ISBN string is valid
  471. ]]
  472. local function isbn( isbn_str )
  473. if nil ~= isbn_str:match("[^%s-0-9X]") then
  474. return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
  475. end
  476. isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
  477. local len = isbn_str:len();
  478. if len ~= 10 and len ~= 13 then
  479. return false, 'length'; -- fail if incorrect length
  480. end
  481. if len == 10 then
  482. if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position
  483. return false, 'invalid form';
  484. end
  485. return is_valid_isxn(isbn_str, 10), 'checksum';
  486. else
  487. if isbn_str:match( "^%d+$" ) == nil then
  488. return false, 'invalid character'; -- fail if isbn13 is not all digits
  489. end
  490. if isbn_str:match( "^97[89]%d*$" ) == nil then
  491. return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979
  492. end
  493. return is_valid_isxn_13 (isbn_str), 'checksum';
  494. end
  495. end
  496. --[[--------------------------< A M A Z O N >------------------------------------------------------------------
  497. Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
  498. characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
  499. isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
  500. Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
  501. This function is positioned here because it calls isbn()
  502. ]]
  503. local function asin(id, domain)
  504. local err_cat = ""
  505. if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
  506. err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
  507. else
  508. if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
  509. if isbn( id ) then -- see if asin value is isbn10
  510. add_maint_cat ('ASIN');
  511. elseif not is_set (err_cat) then
  512. err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
  513. end
  514. elseif not id:match("^%u[%d%u]+$") then
  515. err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
  516. end
  517. end
  518. if not is_set(domain) then
  519. domain = "com";
  520. elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
  521. domain = "co." .. domain;
  522. elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
  523. domain = "com." .. domain;
  524. end
  525. local handler = cfg.id_handlers['ASIN'];
  526. return external_link_id({link=handler.link,
  527. label=handler.label, q = handler.q, prefix=handler.prefix .. domain .. "/dp/",
  528. id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
  529. end
  530. --[[--------------------------< I S M N >----------------------------------------------------------------------
  531. Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
  532. same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
  533. section 2, pages 9–12.
  534. ]]
  535. local function ismn (id)
  536. local handler = cfg.id_handlers['ISMN'];
  537. local text;
  538. local valid_ismn = true;
  539. local id_copy;
  540. id_copy = id; -- save a copy because this testing is destructive
  541. id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
  542. if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
  543. valid_ismn = false;
  544. else
  545. valid_ismn=is_valid_isxn_13 (id); -- validate ismn
  546. end
  547. -- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
  548. -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  549. text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id;
  550. -- because no place to link to yet
  551. if false == valid_ismn then
  552. text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the issn is invalid
  553. end
  554. return text;
  555. end
  556. --[[--------------------------< I S S N >----------------------------------------------------------------------
  557. Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but
  558. has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked
  559. like this:
  560. |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
  561. This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length
  562. and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters
  563. other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message. The
  564. issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
  565. ]]
  566. local function issn(id, e)
  567. local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
  568. local handler;
  569. local text;
  570. local valid_issn = true;
  571. if e then
  572. handler = cfg.id_handlers['EISSN'];
  573. else
  574. handler = cfg.id_handlers['ISSN'];
  575. end
  576. id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
  577. if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
  578. valid_issn=false; -- wrong length or improper character
  579. else
  580. valid_issn=is_valid_isxn(id, 8); -- validate issn
  581. end
  582. if true == valid_issn then
  583. id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
  584. else
  585. id = issn_copy; -- if not valid, use the show the invalid issn with error message
  586. end
  587. text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
  588. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  589. if false == valid_issn then
  590. text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid
  591. end
  592. return text
  593. end
  594. --[[--------------------------< J F M >-----------------------------------------------------------------------
  595. A numerical identifier in the form nn.nnnn.nn
  596. ]]
  597. local function jfm (id)
  598. local handler = cfg.id_handlers['JFM'];
  599. local id_num;
  600. local err_cat = '';
  601. id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier
  602. if is_set (id_num) then
  603. add_maint_cat ('jfm_format');
  604. else -- plain number without mr prefix
  605. id_num = id; -- if here id does not have prefix
  606. end
  607. if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then
  608. id = id_num; -- jfm matches pattern
  609. else
  610. err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message
  611. end
  612. return external_link_id({link = handler.link, label = handler.label, q = handler.q,
  613. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  614. end
  615. --[[--------------------------< L C C N >----------------------------------------------------------------------
  616. Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of
  617. the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits.
  618. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
  619. length = 8 then all digits
  620. length = 9 then lccn[1] is lower case alpha
  621. length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
  622. length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
  623. length = 12 then lccn[1] and lccn[2] are both lower case alpha
  624. ]]
  625. local function lccn(lccn)
  626. local handler = cfg.id_handlers['LCCN'];
  627. local err_cat = ''; -- presume that LCCN is valid
  628. local id = lccn; -- local copy of the lccn
  629. id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
  630. local len = id:len(); -- get the length of the lccn
  631. if 8 == len then
  632. if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
  633. err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
  634. end
  635. elseif 9 == len then -- LCCN should be adddddddd
  636. if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
  637. err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
  638. end
  639. elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
  640. if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
  641. if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
  642. err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  643. end
  644. end
  645. elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
  646. if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
  647. err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  648. end
  649. elseif 12 == len then -- LCCN should be aadddddddddd
  650. if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
  651. err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  652. end
  653. else
  654. err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
  655. end
  656. if not is_set (err_cat) and nil ~= lccn:find ('%s') then
  657. err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
  658. end
  659. return external_link_id({link = handler.link, label = handler.label, q = handler.q,
  660. prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
  661. end
  662. --[[--------------------------< M R >--------------------------------------------------------------------------
  663. A seven digit number; if not seven digits, zero-fill leading digits to make seven digits.
  664. ]]
  665. local function mr (id)
  666. local handler = cfg.id_handlers['MR'];
  667. local id_num;
  668. local id_len;
  669. local err_cat = '';
  670. id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix
  671. if is_set (id_num) then
  672. add_maint_cat ('mr_format');
  673. else -- plain number without mr prefix
  674. id_num = id:match ('^%d+$'); -- if here id is all digits
  675. end
  676. id_len = id_num and id_num:len() or 0;
  677. if (7 >= id_len) and (0 ~= id_len) then
  678. id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits
  679. else
  680. err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message
  681. end
  682. return external_link_id({link = handler.link, label = handler.label, q = handler.q,
  683. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  684. end
  685. --[[--------------------------< O C L C >----------------------------------------------------------------------
  686. Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
  687. archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html
  688. ]]
  689. local function oclc (id)
  690. local handler = cfg.id_handlers['OCLC'];
  691. local number;
  692. local err_msg = ''; -- empty string for concatenation
  693. if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters)
  694. number = id:match('ocm(%d+)'); -- get the number
  695. elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters)
  696. number = id:match('ocn(%d+)'); -- get the number
  697. elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters)
  698. number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number
  699. elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field
  700. number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number
  701. if 9 < number:len() then
  702. number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers
  703. end
  704. elseif id:match('^%d+$') then -- no prefix
  705. number = id; -- get the number
  706. if 10 < number:len() then
  707. number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers
  708. end
  709. end
  710. if number then -- proper format
  711. id = number; -- exclude prefix, if any, from external link
  712. else
  713. err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed
  714. end
  715. local text = external_link_id({link=handler.link, label=handler.label, q = handler.q,
  716. prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg;
  717. return text;
  718. end
  719. --[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
  720. Formats an OpenLibrary link, and checks for associated errors.
  721. ]]
  722. local function openlibrary(id, access)
  723. local code;
  724. local handler = cfg.id_handlers['OL'];
  725. local ident;
  726. ident, code = id:match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix
  727. if not is_set (ident) then -- if malformed return an error
  728. return external_link_id({link=handler.link, label=handler.label, q = handler.q,
  729. prefix=handler.prefix .. 'OL',
  730. id=id, separator=handler.separator, encode = handler.encode,
  731. access = access}) .. ' ' .. set_error( 'bad_ol' );
  732. end
  733. id = ident; -- use ident without the optional OL prefix (it has been removed)
  734. if ( code == "A" ) then
  735. return external_link_id({link=handler.link, label=handler.label, q = handler.q,
  736. prefix=handler.prefix .. 'authors/OL',
  737. id=id, separator=handler.separator, encode = handler.encode,
  738. access = access})
  739. end
  740. if ( code == "M" ) then
  741. return external_link_id({link=handler.link, label=handler.label, q = handler.q,
  742. prefix=handler.prefix .. 'books/OL',
  743. id=id, separator=handler.separator, encode = handler.encode,
  744. access = access})
  745. end
  746. if ( code == "W" ) then
  747. return external_link_id({link=handler.link, label=handler.label, q = handler.q,
  748. prefix=handler.prefix .. 'works/OL',
  749. id=id, separator=handler.separator, encode = handler.encode,
  750. access = access})
  751. end
  752. end
  753. --[[--------------------------< P M C >------------------------------------------------------------------------
  754. Format a PMC, do simple error checking, and check for embargoed articles.
  755. The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
  756. be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the
  757. PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
  758. PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
  759. has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
  760. returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
  761. PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less
  762. than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
  763. ]]
  764. local function pmc(id, embargo)
  765. local test_limit = 10000000; -- update this value as PMCs approach
  766. local handler = cfg.id_handlers['PMC'];
  767. local err_cat = ''; -- presume that PMC is valid
  768. local id_num;
  769. local text;
  770. id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix
  771. if is_set (id_num) then
  772. add_maint_cat ('pmc_format');
  773. else -- plain number without pmc prefix
  774. id_num = id:match ('^%d+$'); -- if here id is all digits
  775. end
  776. if is_set (id_num) then -- id_num has a value so test it
  777. id_num = tonumber(id_num); -- convert id_num to a number for range testing
  778. if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
  779. err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
  780. else
  781. id = tostring (id_num); -- make sure id is a string
  782. end
  783. else -- when id format incorrect
  784. err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
  785. end
  786. if is_set (embargo) then -- is PMC is still embargoed?
  787. text = table.concat ( -- still embargoed so no external link
  788. {
  789. make_internal_link (handler.link, handler.label),
  790. handler.separator,
  791. id,
  792. err_cat
  793. });
  794. else
  795. text = external_link_id({link = handler.link, label = handler.label, q = handler.q, -- no embargo date or embargo has expired, ok to link to article
  796. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
  797. end
  798. return text;
  799. end
  800. --[[--------------------------< P M I D >----------------------------------------------------------------------
  801. Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This
  802. code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
  803. test_limit will need to be updated periodically as more PMIDs are issued.
  804. ]]
  805. local function pmid(id)
  806. local test_limit = 40000000; -- update this value as PMIDs approach
  807. local handler = cfg.id_handlers['PMID'];
  808. local err_cat = ''; -- presume that PMID is valid
  809. if id:match("[^%d]") then -- if PMID has anything but digits
  810. err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
  811. else -- PMID is only digits
  812. local id_num = tonumber(id); -- convert id to a number for range testing
  813. if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
  814. err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
  815. end
  816. end
  817. return external_link_id({link = handler.link, label = handler.label, q = handler.q,
  818. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  819. end
  820. --[[--------------------------< S 2 C I D >--------------------------------------------------------------------
  821. Format an S2CID, do simple error checking
  822. S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the S2CID to see that it is only
  823. digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically
  824. as more S2CIDs are issued.
  825. ]]
  826. local function s2cid (id, access)
  827. local test_limit = 250000000;
  828. local handler = cfg.id_handlers['S2CID'];
  829. local err_cat = ''; -- presume that S2CID is valid
  830. local id_num = id:match ('^[1-9]%d*$'); -- id must be all digits; must not begin with 0; no open access flag
  831. if is_set (id_num) then -- id_num has a value so test it
  832. id_num = tonumber (id_num); -- convert id_num to a number for range testing
  833. if test_limit < id_num then -- if S2CID is outside test limit boundaries
  834. err_cat = ' ' .. set_error ('bad_s2cid'); -- set an error message
  835. end
  836. else -- when id format incorrect
  837. err_cat = ' ' .. set_error ('bad_s2cid'); -- set an error message
  838. end
  839. return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
  840. prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) .. err_cat;
  841. end
  842. --[[--------------------------< S S R N >----------------------------------------------------------------------
  843. Format an ssrn, do simple error checking
  844. SSRNs are sequential numbers beginning at 100? and counting up. This code checks the ssrn to see that it is
  845. only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need
  846. to be updated periodically as more SSRNs are issued.
  847. ]]
  848. local function ssrn (id)
  849. local test_limit = 3500000; -- update this value as SSRNs approach
  850. local handler = cfg.id_handlers['SSRN'];
  851. local err_cat = ''; -- presume that SSRN is valid
  852. local id_num;
  853. local text;
  854. id_num = id:match ('^%d+$'); -- id must be all digits
  855. if is_set (id_num) then -- id_num has a value so test it
  856. id_num = tonumber(id_num); -- convert id_num to a number for range testing
  857. if 100 > id_num or test_limit < id_num then -- if SSRN is outside test limit boundaries
  858. err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message
  859. end
  860. else -- when id format incorrect
  861. err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message
  862. end
  863. text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
  864. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat;
  865. return text;
  866. end
  867. --[[--------------------------< U S E N E T _ I D >------------------------------------------------------------
  868. Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in
  869. '<' and/or '>' angle brackets.
  870. ]]
  871. local function usenet_id (id)
  872. local handler = cfg.id_handlers['USENETID'];
  873. local text = external_link_id({link = handler.link, label = handler.label, q = handler.q,
  874. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  875. if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
  876. text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid
  877. end
  878. return text
  879. end
  880. --[[--------------------------< Z B L >-----------------------------------------------------------------------
  881. A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
  882. ]]
  883. local function zbl (id)
  884. local handler = cfg.id_handlers['ZBL'];
  885. local err_cat = '';
  886. id = id:gsub ('^[Zz][Bb][Ll]',""); -- identifier with zbl prefix; extract identifier
  887. if (nil ~= id:match ("^%d%d%d%d%d%d%d%d$")) then
  888. add_maint_cat ('zbl_format'); -- temporary
  889. else
  890. if (nil == id:match('^%d?%d?%d?%d%.%d%d%d%d%d$')) then -- id doesn't match the pattern
  891. err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message
  892. end
  893. end
  894. return external_link_id({link = handler.link, label = handler.label, q = handler.q,
  895. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  896. end
  897. --============================<< I N T E R F A C E F U N C T I O N S >>==========================================
  898. --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
  899. Takes a table of IDs created by extract_ids() and turns it into a table of formatted ID outputs.
  900. inputs:
  901. id_list – table of identifiers built by extract_ids()
  902. options – table of various template parameter values used to modify some manually handled identifiers
  903. ]]
  904. local function build_id_list( id_list, options )
  905. local new_list, handler = {};
  906. local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;
  907. for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
  908. -- fallback to read-only cfg
  909. handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) );
  910. if handler.mode == 'external' then
  911. table.insert( new_list, {handler.label, external_link_id( handler ) } );
  912. elseif handler.mode == 'internal' then
  913. table.insert( new_list, {handler.label, internal_link_id( handler ) } );
  914. elseif handler.mode ~= 'manual' then
  915. throw_error( 'unknown_ID_mode' );
  916. elseif k == 'ARXIV' then
  917. table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );
  918. elseif k == 'ASIN' then
  919. table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } );
  920. elseif k == 'BIBCODE' then
  921. table.insert( new_list, {handler.label, bibcode( v, handler.access ) } );
  922. elseif k == 'BIORXIV' then
  923. table.insert( new_list, {handler.label, biorxiv( v ) } );
  924. elseif k == 'CITESEERX' then
  925. table.insert( new_list, {handler.label, citeseerx( v ) } );
  926. elseif k == 'CNID' then
  927. table.insert( new_list, {handler.label, cnid( v ) } );
  928. elseif k == 'CSBN' then
  929. table.insert( new_list, {handler.label, csbn( v ) } );
  930. elseif k == 'DOI' then
  931. table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } );
  932. elseif k == 'EISSN' then
  933. table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn
  934. elseif k == 'HDL' then
  935. table.insert( new_list, {handler.label, hdl( v, handler.access ) } );
  936. elseif k == 'ISBN' then
  937. local ISBN = internal_link_id( handler );
  938. local check;
  939. local err_type = '';
  940. check, err_type = isbn( v );
  941. if not check then
  942. if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set
  943. add_maint_cat ('ignore_isbn_err'); -- ad a maint category
  944. else
  945. ISBN = ISBN .. set_error( 'bad_isbn', {err_type}, false, " ", "" ); -- else display an error message
  946. end
  947. elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set
  948. add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary
  949. end
  950. table.insert( new_list, {handler.label, ISBN } );
  951. elseif k == 'ISMN' then
  952. table.insert( new_list, {handler.label, ismn( v ) } );
  953. elseif k == 'ISSN' then
  954. table.insert( new_list, {handler.label, issn( v ) } );
  955. elseif k == 'JFM' then
  956. table.insert( new_list, {handler.label, jfm( v ) } );
  957. elseif k == 'LCCN' then
  958. table.insert( new_list, {handler.label, lccn( v ) } );
  959. elseif k == 'MR' then
  960. table.insert( new_list, {handler.label, mr( v ) } );
  961. elseif k == 'OCLC' then
  962. table.insert( new_list, {handler.label, oclc( v ) } );
  963. elseif k == 'OL' or k == 'OLA' then
  964. table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } );
  965. elseif k == 'PMC' then
  966. table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
  967. elseif k == 'PMID' then
  968. table.insert( new_list, {handler.label, pmid( v ) } );
  969. elseif k == 'S2CID' then
  970. table.insert( new_list, {handler.label, s2cid( v, handler.access ) } );
  971. elseif k == 'SSRN' then
  972. table.insert( new_list, {handler.label, ssrn( v ) } );
  973. elseif k == 'USENETID' then
  974. table.insert( new_list, {handler.label, usenet_id( v ) } );
  975. elseif k == 'ZBL' then
  976. table.insert( new_list, {handler.label, zbl( v ) } );
  977. else
  978. throw_error( 'unknown_manual_ID' );
  979. end
  980. end
  981. local function comp( a, b ) -- used in following table.sort()
  982. return a[1] < b[1];
  983. end
  984. table.sort( new_list, comp );
  985. for k, v in ipairs( new_list ) do
  986. new_list[k] = v[2];
  987. end
  988. return new_list;
  989. end
  990. --[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------
  991. Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
  992. any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to
  993. the identifier list. Emits redundant error message is more than one alias exists in args
  994. ]]
  995. local function extract_ids( args )
  996. local id_list = {}; -- list of identifiers found in args
  997. for k, v in pairs( cfg.id_handlers ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
  998. v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present
  999. if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list
  1000. end
  1001. return id_list;
  1002. end
  1003. --[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >--------------------------------------
  1004. Fetches custom id access levels from arguments using configuration settings.
  1005. Parameters which have a predefined access level (e.g. arxiv) do not use this
  1006. function as they are directly rendered as free without using an additional parameter.
  1007. ]]
  1008. local function extract_id_access_levels( args, id_list )
  1009. local id_accesses_list = {};
  1010. for k, v in pairs( cfg.id_handlers ) do
  1011. local access_param = v.custom_access;
  1012. local k_lower = string.lower(k);
  1013. if is_set(access_param) then
  1014. local access_level = args[access_param];
  1015. if is_set(access_level) then
  1016. if not in_array (access_level:lower(), cfg.keywords['id-access']) then
  1017. append_error( 'invalid_param_val', {access_param, access_level});
  1018. access_level = nil;
  1019. end
  1020. if not is_set(id_list[k]) then
  1021. append_error( 'param_access_requires_param', {k_lower});
  1022. end
  1023. if is_set(access_level) then
  1024. access_level = access_level:lower();
  1025. end
  1026. id_accesses_list[k] = access_level;
  1027. end
  1028. end
  1029. end
  1030. return id_accesses_list;
  1031. end
  1032. --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
  1033. Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
  1034. ]]
  1035. local function set_selected_modules (cfg_table_ptr, utilities_page_ptr, error_page_ptr, links_page_ptr, validation_page_ptr)
  1036. cfg = cfg_table_ptr;
  1037. is_set = utilities_page_ptr.is_set;
  1038. in_array = utilities_page_ptr.in_array;
  1039. wrap_style = utilities_page_ptr.wrap_style;
  1040. append_error = error_page_ptr.append_error;
  1041. set_error = error_page_ptr.set_error;
  1042. throw_error = error_page_ptr.throw_error;
  1043. select_one = error_page_ptr.select_one;
  1044. add_maint_cat = error_page_ptr.add_maint_cat;
  1045. make_internal_link = links_page_ptr.make_internal_link;
  1046. is_valid_date_from_a_point = validation_page_ptr.is_valid_date_from_a_point;
  1047. end
  1048. --[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
  1049. ]]
  1050. return {
  1051. build_id_list = build_id_list,
  1052. extract_ids = extract_ids,
  1053. extract_id_access_levels = extract_id_access_levels,
  1054. is_embargoed = is_embargoed;
  1055. set_selected_modules = set_selected_modules;
  1056. }