Module:Citation/CS1/Utilities
< Module:Citation | CS1
跳到导航
跳到搜索
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- ]] local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration --[[--------------------------< H Y P H E N _ T O _ D A S H >------------------------------------------------- Converts a hyphen to a dash ]] -- local function hyphen_to_dash( str ) if not is_set(str) or str:match( "[%[%]{}<>]" ) ~= nil then return str; end return str:gsub( '-', '–' ); end --[[--------------------------< I S _ S E T >------------------------------------------------------------------ Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string. This function is global because it is called from both this module and from Date validation ]] function is_set( var ) return not (var == nil or var == ''); end --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------ Locates and returns the first set value in a table of values where the order established in the table, left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set. This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate the for-loop before it reached the actual end of the list. ]] local function first_set (list, count) local i = 1; while i <= count do -- loop through all items in list if is_set( list[i] ) then return list[i]; -- return the first set list member end i = i + 1; -- point to next end end --[[--------------------------< I N _ A R R A Y >-------------------------------------------------------------- Whether needle is in haystack ]] local function in_array( needle, haystack ) if needle == nil then return false; end for n,v in ipairs( haystack ) do if v == needle then return n; end end return false; end --[[--------------------------< S U B S T I T U T E >---------------------------------------------------------- Populates numbered arguments in a message string using an argument table. ]] local function substitute( msg, args ) return args and mw.message.newRawMessage( msg, args ):plain() or msg; end --[[--------------------------< H A S _ A C C E P T _ A S _ W R I T T E N >------------------------------------ When <str> is wholly wrapped in accept-as-written markup, return <str> without markup and true; return <str> and false else with allow_empty = false, <str> must have at least one character inside the markup with allow_empty = true, <str> the markup frame can be empty like (()) to distinguish an empty template parameter from the specific condition "has no applicable value" in citation-context. After further evaluation the two cases might be merged at a later stage, but should be kept separated for now. ]] local function has_accept_as_written (str, allow_empty) local count; if true == allow_empty then str, count = str:gsub ('^%(%((.*)%)%)$', '%1'); -- allows (()) to be an empty set else str, count = str:gsub ('^%(%((.+)%)%)$', '%1'); end return str, 0 ~= count; end --[[--------------------------< S A F E _ F O R _ I T A L I C S >---------------------------------------------- Protects a string that will be wrapped in wiki italic markup '' ... '' Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that they will be inverted (i.e. unitalicized) in the resulting references. In addition, <i> and '' tend to interact poorly under Mediawiki's HTML tidy. ]] local function safe_for_italics (str) if not is_set (str) then return str end if str:sub (1, 1) == "'" then str = "<span></span>" .. str; end if str:sub (-1, -1) == "'" then str = str .. "<span></span>"; end -- Remove newlines as they break italics. return str:gsub ('\n', ' '); end --[[--------------------------< W R A P _ S T Y L E >---------------------------------------------------------- Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason this function is similar to but separate from wrap_msg(). ]] local function wrap_style (key, str) if not is_set (str) then return ''; elseif in_array (key, cfg.presentation['_safe_for_italics']) then str = safe_for_italics (str); end return substitute (cfg.presentation[key], str); end --[[--------------------------< M A K E _ S E P _ L I S T >------------------------------------------------------------ make a separated list of items using provided separators. <sep_list> - typically '<comma><space>' <sep_list_pair> - typically '<space>and<space>' <sep_list_end> - typically '<comma><space>and<space>' or '<comma><space>&<space>' defaults to cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], and cfg.presentation['sep_list_end'] if <sep_list_end> is specified, <sep_list> and <sep_list_pair> must also be supplied ]] local function make_sep_list (count, list_seq, sep_list, sep_list_pair, sep_list_end) local list = ''; if not sep_list then -- set the defaults sep_list = cfg.presentation['sep_list']; sep_list_pair = cfg.presentation['sep_list_pair']; sep_list_end = cfg.presentation['sep_list_end']; end if 2 >= count then list = table.concat (list_seq, sep_list_pair); -- insert separator between two items; returns list_seq[1] then only one item elseif 2 < count then list = table.concat (list_seq, sep_list, 1, count - 1); -- concatenate all but last item with plain list separator list = table.concat ({list, list_seq[count]}, sep_list_end); -- concatenate last item onto end of <list> with final separator end return list; end --[[--------------------------< S A F E _ J O I N >------------------------------------------------------------ Joins a sequence of strings together while checking for duplicate separation characters. ]] local function safe_join( tbl, duplicate_char ) --[[ Note: we use string functions here, rather than ustring functions. This has considerably faster performance and should work correctly as long as the duplicate_char is strict ASCII. The strings in tbl may be ASCII or UTF8. ]] local str = ''; -- the output string local comp = ''; -- what does 'comp' mean? local end_chr = ''; local trim; for _, value in ipairs( tbl ) do if value == nil then value = ''; end if str == '' then -- if output string is empty str = value; -- assign value to it (first time through the loop) elseif value ~= '' then if value:sub(1,1) == '<' then -- Special case of values enclosed in spans and other markup. comp = value:gsub( "%b<>", "" ); -- remove html markup (<span>string</span> -> string) else comp = value; end -- typically duplicate_char is sepc if comp:sub(1,1) == duplicate_char then -- is first charactier same as duplicate_char? why test first character? -- Because individual string segments often (always?) begin with terminal punct for th -- preceding segment: 'First element' .. 'sepc next element' .. etc? trim = false; end_chr = str:sub(-1,-1); -- get the last character of the output string -- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff? if end_chr == duplicate_char then -- if same as separator str = str:sub(1,-2); -- remove it elseif end_chr == "'" then -- if it might be wikimarkup if str:sub(-3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc'' str = str:sub(1, -4) .. "''"; -- remove them and add back '' elseif str:sub(-5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]'' trim = true; -- why? why do this and next differently from previous? elseif str:sub(-4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]'' trim = true; -- same question end elseif end_chr == "]" then -- if it might be wikimarkup if str:sub(-3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink trim = true; elseif str:sub(-2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link trim = true; elseif str:sub(-4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title. trim = true; end elseif end_chr == " " then -- if last char of output string is a space if str:sub(-2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space> str = str:sub(1,-3); -- remove them both end end if trim then if value ~= comp then -- value does not equal comp when value contains html markup local dup2 = duplicate_char; if dup2:match( "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it value = value:gsub( "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup else value = value:sub( 2, -1 ); -- remove duplicate_char when it is first character end end end str = str .. value; --add it to the output string end end return str; end --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >-------------------------------- Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata. This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind. ]] local function strip_apostrophe_markup (argument) if not is_set (argument) then return argument; end while true do if argument:match ("%'%'%'%'%'") then -- bold italic (5) argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4) argument=argument:gsub("%'%'%'%'", ""); elseif argument:match ("%'%'%'") then -- bold (3) argument=argument:gsub("%'%'%'", ""); elseif argument:match ("%'%'") then -- italic (2) argument=argument:gsub("%'%'", ""); else break; end end return argument; -- done end --[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >---------------------------------------- This function searches a parameter's value for nonprintable or invisible characters. The search stops at the first match. This function will detect the visible replacement character when it is part of the wikisource. Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers (gallery, math, pre, ref) and identifies them with a slightly different error message. See also coins_cleanup(). Detects but ignores the character pattern that results from the transclusion of {{'}} templates. Output of this function is an error message that identifies the character or the Unicode group, or the stripmarker that was detected along with its position (or, for multi-byte characters, the position of its first byte) in the parameter value. ]] local function has_invisible_chars (param, v) local position = ''; -- position of invisible char or starting position of stripmarker local dummy; -- end of matching string; not used but required to hold end position when a capture is returned local capture; -- used by stripmarker detection to hold name of the stripmarker local i=1; local stripmarker, apostrophe; while cfg.invisible_chars[i] do local char=cfg.invisible_chars[i][1] -- the character or group name local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it position, dummy, capture = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern if position then if 'nowiki' == capture or 'math' == capture or -- nowiki and math stripmarkers (not an error condition) ('templatestyles' == capture) then -- templatestyles stripmarker allowed stripmarker = true; -- set a flag elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker position = nil; -- unset elseif 'apostrophe' == char then -- apostrophe template uses ‍, hair space and zero-width space apostrophe = true; elseif true == apostrophe and in_array (char, {'zero width joiner', 'zero width space', 'hair space'}) then position = nil; -- unset else local err_msg; if capture then err_msg = capture .. ' ' .. cfg.invisible_chars[i][3] or char; else err_msg = cfg.invisible_chars[i][3] or (char .. ' character'); end return {err_msg, wrap_style ('parameter', param), position}; -- and done with this parameter end end i=i+1; -- bump our index end end --[[--------------------------< W R A P _ M S G >-------------------------------------------------------------- Applies additional message text to various parameter values. Supplied string is wrapped using a message_list configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken from citation_config.messages - the reason this function is similar to but separate from wrap_style(). ]] local function wrap_msg (key, str, lower) if not is_set (str) then return ''; elseif in_array (key, cfg.messages['_safe_for_italics']) then str = safe_for_italics (str); end if true == lower then local msg; msg = cfg.messages[key]:lower(); -- set the message to lower case before return substitute (msg, str); -- including template text else return substitute (cfg.messages[key], str); end end --[[--------------------------< K E R N _ Q U O T E S >-------------------------------------------------------- Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value. This function will positive kern either single or double quotes: "'Unkerned title with leading and trailing single quote marks'" " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example) Double single quotes (italic or bold wikimarkup) are not kerned. Call this function for chapter titles, for website titles, etc; not for book titles. ]] local function kern_quotes (str) local cap=''; local cap2=''; cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes if is_set (cap) then str = wrap_style ('kern-left', {cap, cap2}); end cap, cap2 = str:match ("^(.+[^\'])([\"\'])$") if is_set (cap) then str = wrap_style ('kern-right', {cap, cap2}); end return str; end --[[--------------------------< P E N D _ S E P A R A T O R >-------------------------------------------------- ]] local function pend_separator (item, sepc, prepend) if prepend then return is_set (item) and sepc .. ' ' .. item or ''; else return is_set (item) and item .. sepc .. ' ' or ''; end end --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >-------------------------------------- Sets local cfg table to same (live or sandbox) as that used by the other modules. ]] local function set_selected_modules (cfg_table_ptr) cfg = cfg_table_ptr; end --[[--------------------------< E X P O R T S >---------------------------------------------------------------- ]] return { first_set = first_set, -- exported functions has_accept_as_written = has_accept_as_written, has_invisible_chars = has_invisible_chars, hyphen_to_dash = hyphen_to_dash, in_array = in_array, is_set = is_set, kern_quotes = kern_quotes, make_sep_list = make_sep_list, pend_separator = pend_separator, safe_join = safe_join, substitude = substitude, strip_apostrophe_markup = strip_apostrophe_markup, substitute = substitute, wrap_style = wrap_style, wrap_msg = wrap_msg, set_selected_modules = set_selected_modules }