置顶公告:【置顶】关于临时开启评论区所有功能的公告(2022.10.22) | 【置顶】关于本站Widget恢复使用的公告
  • 你好~!欢迎来到萌娘百科镜像站!如需查看或编辑,请联系本站管理员注册账号。
  • 本镜像站和其他萌娘百科的镜像站无关,请注意分别。

Module:Citation/CS1/COinS

猛汉♂百科,万男皆可猛的百科全书!转载请标注来源页面的网页链接,并声明引自猛汉百科。内容不可商用。
跳到导航 跳到搜索
Template-info.svg 模块文档  [创建] [刷新]
  1. --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
  2. ]]
  3. local is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities and Module:Citation/CS1/Links
  4. local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
  5. --[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
  6. Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
  7. Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't corrupted with strings
  8. of %27%27...
  9. ]]
  10. local function make_coins_title (title, script)
  11. if is_set (title) then
  12. title = strip_apostrophe_markup (title); -- strip any apostrophe markup
  13. else
  14. title = ''; -- if not set, make sure title is an empty string
  15. end
  16. if is_set (script) then
  17. script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)
  18. script = strip_apostrophe_markup (script); -- strip any apostrophe markup
  19. else
  20. script = ''; -- if not set, make sure script is an empty string
  21. end
  22. if is_set (title) and is_set (script) then
  23. script = ' ' .. script; -- add a space before we concatenate
  24. end
  25. return title .. script; -- return the concatenation
  26. end
  27. --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
  28. Returns a string where all of Lua's magic characters have been escaped. This is important because functions like
  29. string.gsub() treat their pattern and replace strings as patterns, not literal strings.
  30. ]]
  31. local function escape_lua_magic_chars (argument)
  32. argument = argument:gsub("%%", "%%%%"); -- replace % with %%
  33. argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters
  34. return argument;
  35. end
  36. --[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
  37. Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
  38. ]]
  39. local function get_coins_pages (pages)
  40. local pattern;
  41. if not is_set (pages) then return pages; end -- if no page numbers then we're done
  42. while true do
  43. pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url "
  44. if nil == pattern then break; end -- no more URLs
  45. pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters
  46. pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
  47. end
  48. pages = pages:gsub("[%[%]]", ""); -- remove the brackets
  49. pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
  50. pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
  51. return pages;
  52. end
  53. --[=[-------------------------< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >------------------
  54. There are three options for math markup rendering that depend on the editor's math preference settings. These
  55. settings are at [[Special:Preferences#mw-prefsection-rendering]] and are
  56. PNG images
  57. TeX source
  58. MathML with SVG or PNG fallback
  59. All three are heavy with HTML and CSS which doesn't belong in the metadata.
  60. Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
  61. of the last editor to save the page.
  62. This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
  63. then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
  64. that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.
  65. When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial
  66. value. To replace multipe equations it is necessary to call this function from within a loop.
  67. ]=]
  68. local function coins_replace_math_stripmarker (value)
  69. local stripmarker = cfg.stripmarkers['math'];
  70. local rendering = value:match (stripmarker); -- is there a math stripmarker
  71. if not rendering then -- when value doesn't have a math stripmarker, abandon this test
  72. return false, value;
  73. end
  74. rendering = mw.text.unstripNoWiki (rendering); -- convert stripmarker into rendered value (or nil? ''? when math render error)
  75. if rendering:match ('alt="[^"]+"') then -- if PNG math option
  76. rendering = rendering:match ('alt="([^"]+)"'); -- extract just the math text
  77. elseif rendering:match ('$%s+.+%s+%$') then -- if TeX math option; $ is legit character that is escapes as \$
  78. rendering = rendering:match ('$%s+(.+)%s+%$') -- extract just the math text
  79. elseif rendering:match ('<annotation[^>]+>.+</annotation>') then -- if MathML math option
  80. rendering = rendering:match ('<annotation[^>]+>(.+)</annotation>') -- extract just the math text
  81. else
  82. return false, value; -- had math stripmarker but not one of the three defined forms
  83. end
  84. return true, value:gsub (stripmarker, rendering, 1);
  85. end
  86. --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
  87. Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.
  88. 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content
  89. when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29
  90. TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
  91. characters table?
  92. ]]
  93. local function coins_cleanup (value)
  94. local replaced = true; -- default state to get the do loop running
  95. while replaced do -- loop until all math stripmarkers replaced
  96. replaced, value = coins_replace_math_stripmarker (value); -- replace math stripmarker with text representation of the equation
  97. end
  98. value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
  99. value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
  100. value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">&#39;(s?)</span>', "'%1"); -- replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s
  101. value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
  102. value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
  103. if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script
  104. value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
  105. value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
  106. end
  107. value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
  108. return value;
  109. end
  110. --[[--------------------------< C O I N S >--------------------------------------------------------------------
  111. COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.
  112. ]]
  113. local function COinS (data, class)
  114. if 'table' ~= type (data) or nil == next (data) then
  115. return '';
  116. end
  117. for k, v in pairs (data) do -- spin through all of the metadata parameter values
  118. if 'ID_list' ~= k and 'Authors' ~= k then -- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed)
  119. data[k] = coins_cleanup (v);
  120. end
  121. end
  122. local ctx_ver = "Z39.88-2004";
  123. -- treat table strictly as an array with only set values.
  124. local OCinSoutput = setmetatable ({}, {
  125. __newindex = function(self, key, value)
  126. if is_set (value) then
  127. rawset (self, #self+1, table.concat {key, '=', mw.uri.encode (remove_wiki_link (value))});
  128. end
  129. end
  130. });
  131. if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
  132. ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
  133. OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
  134. if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
  135. OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
  136. elseif 'conference' == class then
  137. OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
  138. elseif 'web' == class then
  139. OCinSoutput["rft.genre"] = "unknown"; -- cite web (when Periodical set)
  140. else
  141. OCinSoutput["rft.genre"] = "article"; -- journal and other 'periodical' articles
  142. end
  143. OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only
  144. if is_set (data.Map) then
  145. OCinSoutput["rft.atitle"] = data.Map; -- for a map in a periodical
  146. else
  147. OCinSoutput["rft.atitle"] = make_coins_title (data.Title, data.ScriptTitle);
  148. -- all other 'periodical' article titles
  149. end
  150. -- these used onlu for periodicals
  151. OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
  152. OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
  153. OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
  154. OCinSoutput["rft.issue"] = data.Issue;
  155. OCinSoutput["rft.pages"] = get_coins_pages (data.Pages); -- also used in book metadata
  156. elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes
  157. OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"; -- book metadata identifier
  158. if 'report' == class or 'techreport' == class then -- cite report and cite techreport
  159. OCinSoutput["rft.genre"] = "report";
  160. elseif 'conference' == class then -- cite conference when Periodical not set
  161. OCinSoutput["rft.genre"] = "conference";
  162. elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
  163. if is_set (data.Chapter) or is_set (data.ScriptChapter) then
  164. OCinSoutput["rft.genre"] = "bookitem";
  165. OCinSoutput["rft.atitle"] = make_coins_title (data.Chapter, data.ScriptChapter);
  166. -- book chapter, encyclopedia article, interview in a book, or map title
  167. else
  168. if 'map' == class or 'interview' == class then
  169. OCinSoutput["rft.genre"] = 'unknown'; -- standalone map or interview
  170. else
  171. OCinSoutput["rft.genre"] = 'book'; -- book and encyclopedia
  172. end
  173. end
  174. else --{'AV media', 'AV media notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
  175. OCinSoutput["rft.genre"] = "unknown";
  176. end
  177. OCinSoutput["rft.btitle"] = make_coins_title (data.Title, data.ScriptTitle);
  178. -- book only
  179. OCinSoutput["rft.place"] = data.PublicationPlace; -- book only
  180. OCinSoutput["rft.series"] = data.Series; -- book only
  181. OCinSoutput["rft.pages"] = get_coins_pages (data.Pages); -- book, journal
  182. OCinSoutput["rft.edition"] = data.Edition; -- book only
  183. OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation
  184. else -- cite thesis
  185. OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier
  186. OCinSoutput["rft.title"] = make_coins_title (data.Title, data.ScriptTitle);
  187. -- dissertation (also patent but that is not yet supported)
  188. OCinSoutput["rft.degree"] = data.Degree; -- dissertation only
  189. OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
  190. end
  191. -- and now common parameters (as much as possible)
  192. OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
  193. for k, v in pairs (data.ID_list) do -- what to do about these? For now assume that they are common to all?
  194. if k == 'ISBN' then v = v:gsub ("[^-0-9X]", ""); end
  195. local id = cfg.id_handlers[k].COinS;
  196. if string.sub (id or "", 1, 4) == 'info' then -- for ids that are in the info:registry
  197. OCinSoutput["rft_id"] = table.concat {id, "/", v};
  198. elseif string.sub (id or "", 1, 3) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
  199. OCinSoutput[id] = v;
  200. elseif id then -- when cfg.id_handlers[k].COinS is not nil
  201. OCinSoutput["rft_id"] = table.concat {cfg.id_handlers[k].prefix, v};-- others; provide a url
  202. end
  203. end
  204. local last, first;
  205. for k, v in ipairs (data.Authors) do
  206. last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
  207. if k == 1 then -- for the first author name only
  208. if is_set (last) and is_set (first) then -- set these COinS values if |first= and |last= specify the first author name
  209. OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
  210. OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
  211. elseif is_set(last) then
  212. OCinSoutput["rft.au"] = last; -- book, journal, dissertation -- otherwise use this form for the first name
  213. end
  214. else -- for all other authors
  215. if is_set(last) and is_set(first) then
  216. OCinSoutput["rft.au"] = table.concat {last, ", ", first}; -- book, journal, dissertation
  217. elseif is_set(last) then
  218. OCinSoutput["rft.au"] = last; -- book, journal, dissertation
  219. end
  220. end
  221. end
  222. OCinSoutput.rft_id = data.URL;
  223. OCinSoutput.rfr_id = table.concat {"info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage};
  224. OCinSoutput = setmetatable (OCinSoutput, nil);
  225. -- sort with version string always first, and combine.
  226. table.sort (OCinSoutput);
  227. table.insert (OCinSoutput, 1, "ctx_ver=" .. ctx_ver); -- such as "Z39.88-2004"
  228. return table.concat (OCinSoutput, "&");
  229. end
  230. --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
  231. Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
  232. ]]
  233. local function set_selected_modules (cfg_table_ptr, utilities_page_ptr, links_page_ptr)
  234. cfg = cfg_table_ptr;
  235. is_set = utilities_page_ptr.is_set; -- import functions from selected Module:Citation/CS1/Utilities module
  236. in_array = utilities_page_ptr.in_array;
  237. strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
  238. remove_wiki_link = links_page_ptr.remove_wiki_link;
  239. end
  240. --[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
  241. ]]
  242. return {
  243. COinS = COinS,
  244. set_selected_modules = set_selected_modules,
  245. }