置顶公告:【置顶】关于临时开启评论区所有功能的公告(2022.10.22) | 【置顶】关于本站Widget恢复使用的公告
  • 你好~!欢迎来到萌娘百科镜像站!如需查看或编辑,请联系本站管理员注册账号。
  • 本镜像站和其他萌娘百科的镜像站无关,请注意分别。

Module:Citation/CS1/Utilities

猛汉♂百科,万男皆可猛的百科全书!转载请标注来源页面的网页链接,并声明引自猛汉百科。内容不可商用。
跳到导航 跳到搜索
Template-info.svg 模块文档  [创建] [刷新]
  1. --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
  2. ]]
  3. local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration
  4. --[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------
  5. Converts a hyphen to a dash
  6. ]]
  7. --
  8. local function hyphen_to_dash( str )
  9. if not is_set(str) or str:match( "[%[%]{}<>]" ) ~= nil then
  10. return str;
  11. end
  12. return str:gsub( '-', '–' );
  13. end
  14. --[[--------------------------< I S _ S E T >------------------------------------------------------------------
  15. Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
  16. This function is global because it is called from both this module and from Date validation
  17. ]]
  18. function is_set( var )
  19. return not (var == nil or var == '');
  20. end
  21. --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
  22. Locates and returns the first set value in a table of values where the order established in the table,
  23. left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set.
  24. This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs
  25. version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate
  26. the for-loop before it reached the actual end of the list.
  27. ]]
  28. local function first_set (list, count)
  29. local i = 1;
  30. while i <= count do -- loop through all items in list
  31. if is_set( list[i] ) then
  32. return list[i]; -- return the first set list member
  33. end
  34. i = i + 1; -- point to next
  35. end
  36. end
  37. --[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------
  38. Whether needle is in haystack
  39. ]]
  40. local function in_array( needle, haystack )
  41. if needle == nil then
  42. return false;
  43. end
  44. for n,v in ipairs( haystack ) do
  45. if v == needle then
  46. return n;
  47. end
  48. end
  49. return false;
  50. end
  51. --[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
  52. Populates numbered arguments in a message string using an argument table.
  53. ]]
  54. local function substitute( msg, args )
  55. return args and mw.message.newRawMessage( msg, args ):plain() or msg;
  56. end
  57. --[[--------------------------< H A S _ A C C E P T _ A S _ W R I T T E N >------------------------------------
  58. When <str> is wholly wrapped in accept-as-written markup, return <str> without markup and true; return <str> and false else
  59. with allow_empty = false, <str> must have at least one character inside the markup
  60. with allow_empty = true, <str> the markup frame can be empty like (()) to distinguish an empty template parameter from the specific condition "has no applicable value" in citation-context.
  61. After further evaluation the two cases might be merged at a later stage, but should be kept separated for now.
  62. ]]
  63. local function has_accept_as_written (str, allow_empty)
  64. local count;
  65. if true == allow_empty then
  66. str, count = str:gsub ('^%(%((.*)%)%)$', '%1'); -- allows (()) to be an empty set
  67. else
  68. str, count = str:gsub ('^%(%((.+)%)%)$', '%1');
  69. end
  70. return str, 0 ~= count;
  71. end
  72. --[[--------------------------< S A F E _ F O R _ I T A L I C S >----------------------------------------------
  73. Protects a string that will be wrapped in wiki italic markup '' ... ''
  74. Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that
  75. they will be inverted (i.e. unitalicized) in the resulting references. In addition, <i> and '' tend to interact
  76. poorly under Mediawiki's HTML tidy.
  77. ]]
  78. local function safe_for_italics (str)
  79. if not is_set (str) then return str end
  80. if str:sub (1, 1) == "'" then str = "<span></span>" .. str; end
  81. if str:sub (-1, -1) == "'" then str = str .. "<span></span>"; end
  82. -- Remove newlines as they break italics.
  83. return str:gsub ('\n', ' ');
  84. end
  85. --[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------
  86. Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one
  87. argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason
  88. this function is similar to but separate from wrap_msg().
  89. ]]
  90. local function wrap_style (key, str)
  91. if not is_set (str) then
  92. return '';
  93. elseif in_array (key, cfg.presentation['_safe_for_italics']) then
  94. str = safe_for_italics (str);
  95. end
  96. return substitute (cfg.presentation[key], str);
  97. end
  98. --[[--------------------------< M A K E _ S E P _ L I S T >------------------------------------------------------------
  99. make a separated list of items using provided separators.
  100. <sep_list> - typically '<comma><space>'
  101. <sep_list_pair> - typically '<space>and<space>'
  102. <sep_list_end> - typically '<comma><space>and<space>' or '<comma><space>&<space>'
  103. defaults to cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], and cfg.presentation['sep_list_end']
  104. if <sep_list_end> is specified, <sep_list> and <sep_list_pair> must also be supplied
  105. ]]
  106. local function make_sep_list (count, list_seq, sep_list, sep_list_pair, sep_list_end)
  107. local list = '';
  108. if not sep_list then -- set the defaults
  109. sep_list = cfg.presentation['sep_list'];
  110. sep_list_pair = cfg.presentation['sep_list_pair'];
  111. sep_list_end = cfg.presentation['sep_list_end'];
  112. end
  113. if 2 >= count then
  114. list = table.concat (list_seq, sep_list_pair); -- insert separator between two items; returns list_seq[1] then only one item
  115. elseif 2 < count then
  116. list = table.concat (list_seq, sep_list, 1, count - 1); -- concatenate all but last item with plain list separator
  117. list = table.concat ({list, list_seq[count]}, sep_list_end); -- concatenate last item onto end of <list> with final separator
  118. end
  119. return list;
  120. end
  121. --[[--------------------------< S A F E _ J O I N >------------------------------------------------------------
  122. Joins a sequence of strings together while checking for duplicate separation characters.
  123. ]]
  124. local function safe_join( tbl, duplicate_char )
  125. --[[
  126. Note: we use string functions here, rather than ustring functions.
  127. This has considerably faster performance and should work correctly as
  128. long as the duplicate_char is strict ASCII. The strings
  129. in tbl may be ASCII or UTF8.
  130. ]]
  131. local str = ''; -- the output string
  132. local comp = ''; -- what does 'comp' mean?
  133. local end_chr = '';
  134. local trim;
  135. for _, value in ipairs( tbl ) do
  136. if value == nil then value = ''; end
  137. if str == '' then -- if output string is empty
  138. str = value; -- assign value to it (first time through the loop)
  139. elseif value ~= '' then
  140. if value:sub(1,1) == '<' then -- Special case of values enclosed in spans and other markup.
  141. comp = value:gsub( "%b<>", "" ); -- remove html markup (<span>string</span> -> string)
  142. else
  143. comp = value;
  144. end
  145. -- typically duplicate_char is sepc
  146. if comp:sub(1,1) == duplicate_char then -- is first charactier same as duplicate_char? why test first character?
  147. -- Because individual string segments often (always?) begin with terminal punct for th
  148. -- preceding segment: 'First element' .. 'sepc next element' .. etc?
  149. trim = false;
  150. end_chr = str:sub(-1,-1); -- get the last character of the output string
  151. -- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
  152. if end_chr == duplicate_char then -- if same as separator
  153. str = str:sub(1,-2); -- remove it
  154. elseif end_chr == "'" then -- if it might be wikimarkup
  155. if str:sub(-3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''
  156. str = str:sub(1, -4) .. "''"; -- remove them and add back ''
  157. elseif str:sub(-5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''
  158. trim = true; -- why? why do this and next differently from previous?
  159. elseif str:sub(-4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''
  160. trim = true; -- same question
  161. end
  162. elseif end_chr == "]" then -- if it might be wikimarkup
  163. if str:sub(-3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink
  164. trim = true;
  165. elseif str:sub(-2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
  166. trim = true;
  167. elseif str:sub(-4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
  168. trim = true;
  169. end
  170. elseif end_chr == " " then -- if last char of output string is a space
  171. if str:sub(-2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
  172. str = str:sub(1,-3); -- remove them both
  173. end
  174. end
  175. if trim then
  176. if value ~= comp then -- value does not equal comp when value contains html markup
  177. local dup2 = duplicate_char;
  178. if dup2:match( "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
  179. value = value:gsub( "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
  180. else
  181. value = value:sub( 2, -1 ); -- remove duplicate_char when it is first character
  182. end
  183. end
  184. end
  185. str = str .. value; --add it to the output string
  186. end
  187. end
  188. return str;
  189. end
  190. --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
  191. Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
  192. This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
  193. markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
  194. ]]
  195. local function strip_apostrophe_markup (argument)
  196. if not is_set (argument) then return argument; end
  197. while true do
  198. if argument:match ("%'%'%'%'%'") then -- bold italic (5)
  199. argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
  200. elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4)
  201. argument=argument:gsub("%'%'%'%'", "");
  202. elseif argument:match ("%'%'%'") then -- bold (3)
  203. argument=argument:gsub("%'%'%'", "");
  204. elseif argument:match ("%'%'") then -- italic (2)
  205. argument=argument:gsub("%'%'", "");
  206. else
  207. break;
  208. end
  209. end
  210. return argument; -- done
  211. end
  212. --[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >----------------------------------------
  213. This function searches a parameter's value for nonprintable or invisible characters. The search stops at the
  214. first match.
  215. This function will detect the visible replacement character when it is part of the wikisource.
  216. Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers (gallery, math, pre, ref)
  217. and identifies them with a slightly different error message. See also coins_cleanup().
  218. Detects but ignores the character pattern that results from the transclusion of {{'}} templates.
  219. Output of this function is an error message that identifies the character or the Unicode group, or the stripmarker
  220. that was detected along with its position (or, for multi-byte characters, the position of its first byte) in the
  221. parameter value.
  222. ]]
  223. local function has_invisible_chars (param, v)
  224. local position = ''; -- position of invisible char or starting position of stripmarker
  225. local dummy; -- end of matching string; not used but required to hold end position when a capture is returned
  226. local capture; -- used by stripmarker detection to hold name of the stripmarker
  227. local i=1;
  228. local stripmarker, apostrophe;
  229. while cfg.invisible_chars[i] do
  230. local char=cfg.invisible_chars[i][1] -- the character or group name
  231. local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it
  232. position, dummy, capture = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern
  233. if position then
  234. if 'nowiki' == capture or 'math' == capture or -- nowiki and math stripmarkers (not an error condition)
  235. ('templatestyles' == capture) then -- templatestyles stripmarker allowed
  236. stripmarker = true; -- set a flag
  237. elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker
  238. position = nil; -- unset
  239. elseif 'apostrophe' == char then -- apostrophe template uses &zwj;, hair space and zero-width space
  240. apostrophe = true;
  241. elseif true == apostrophe and in_array (char, {'zero width joiner', 'zero width space', 'hair space'}) then
  242. position = nil; -- unset
  243. else
  244. local err_msg;
  245. if capture then
  246. err_msg = capture .. ' ' .. cfg.invisible_chars[i][3] or char;
  247. else
  248. err_msg = cfg.invisible_chars[i][3] or (char .. ' character');
  249. end
  250. return {err_msg, wrap_style ('parameter', param), position}; -- and done with this parameter
  251. end
  252. end
  253. i=i+1; -- bump our index
  254. end
  255. end
  256. --[[--------------------------< W R A P _ M S G >--------------------------------------------------------------
  257. Applies additional message text to various parameter values. Supplied string is wrapped using a message_list
  258. configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
  259. from citation_config.messages - the reason this function is similar to but separate from wrap_style().
  260. ]]
  261. local function wrap_msg (key, str, lower)
  262. if not is_set (str) then
  263. return '';
  264. elseif in_array (key, cfg.messages['_safe_for_italics']) then
  265. str = safe_for_italics (str);
  266. end
  267. if true == lower then
  268. local msg;
  269. msg = cfg.messages[key]:lower(); -- set the message to lower case before
  270. return substitute (msg, str); -- including template text
  271. else
  272. return substitute (cfg.messages[key], str);
  273. end
  274. end
  275. --[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------
  276. Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
  277. This function will positive kern either single or double quotes:
  278. "'Unkerned title with leading and trailing single quote marks'"
  279. " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
  280. Double single quotes (italic or bold wikimarkup) are not kerned.
  281. Call this function for chapter titles, for website titles, etc; not for book titles.
  282. ]]
  283. local function kern_quotes (str)
  284. local cap='';
  285. local cap2='';
  286. cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes
  287. if is_set (cap) then
  288. str = wrap_style ('kern-left', {cap, cap2});
  289. end
  290. cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")
  291. if is_set (cap) then
  292. str = wrap_style ('kern-right', {cap, cap2});
  293. end
  294. return str;
  295. end
  296. --[[--------------------------< P E N D _ S E P A R A T O R >--------------------------------------------------
  297. ]]
  298. local function pend_separator (item, sepc, prepend)
  299. if prepend then
  300. return is_set (item) and sepc .. ' ' .. item or '';
  301. else
  302. return is_set (item) and item .. sepc .. ' ' or '';
  303. end
  304. end
  305. --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
  306. Sets local cfg table to same (live or sandbox) as that used by the other modules.
  307. ]]
  308. local function set_selected_modules (cfg_table_ptr)
  309. cfg = cfg_table_ptr;
  310. end
  311. --[[--------------------------< E X P O R T S >----------------------------------------------------------------
  312. ]]
  313. return {
  314. first_set = first_set, -- exported functions
  315. has_accept_as_written = has_accept_as_written,
  316. has_invisible_chars = has_invisible_chars,
  317. hyphen_to_dash = hyphen_to_dash,
  318. in_array = in_array,
  319. is_set = is_set,
  320. kern_quotes = kern_quotes,
  321. make_sep_list = make_sep_list,
  322. pend_separator = pend_separator,
  323. safe_join = safe_join,
  324. substitude = substitude,
  325. strip_apostrophe_markup = strip_apostrophe_markup,
  326. substitute = substitute,
  327. wrap_style = wrap_style,
  328. wrap_msg = wrap_msg,
  329. set_selected_modules = set_selected_modules
  330. }