Module:Citation/CS1/Utilities
< Module:Citation | CS1
跳到导航
跳到搜索
- --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
- ]]
- local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration
- --[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------
- Converts a hyphen to a dash
- ]]
- --
- local function hyphen_to_dash( str )
- if not is_set(str) or str:match( "[%[%]{}<>]" ) ~= nil then
- return str;
- end
- return str:gsub( '-', '–' );
- end
- --[[--------------------------< I S _ S E T >------------------------------------------------------------------
- Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
- This function is global because it is called from both this module and from Date validation
- ]]
- function is_set( var )
- return not (var == nil or var == '');
- end
- --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
- Locates and returns the first set value in a table of values where the order established in the table,
- left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set.
- This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs
- version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate
- the for-loop before it reached the actual end of the list.
- ]]
- local function first_set (list, count)
- local i = 1;
- while i <= count do -- loop through all items in list
- if is_set( list[i] ) then
- return list[i]; -- return the first set list member
- end
- i = i + 1; -- point to next
- end
- end
- --[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------
- Whether needle is in haystack
- ]]
- local function in_array( needle, haystack )
- if needle == nil then
- return false;
- end
- for n,v in ipairs( haystack ) do
- if v == needle then
- return n;
- end
- end
- return false;
- end
- --[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
- Populates numbered arguments in a message string using an argument table.
- ]]
- local function substitute( msg, args )
- return args and mw.message.newRawMessage( msg, args ):plain() or msg;
- end
- --[[--------------------------< H A S _ A C C E P T _ A S _ W R I T T E N >------------------------------------
- When <str> is wholly wrapped in accept-as-written markup, return <str> without markup and true; return <str> and false else
- with allow_empty = false, <str> must have at least one character inside the markup
- with allow_empty = true, <str> the markup frame can be empty like (()) to distinguish an empty template parameter from the specific condition "has no applicable value" in citation-context.
- After further evaluation the two cases might be merged at a later stage, but should be kept separated for now.
- ]]
- local function has_accept_as_written (str, allow_empty)
- local count;
- if true == allow_empty then
- str, count = str:gsub ('^%(%((.*)%)%)$', '%1'); -- allows (()) to be an empty set
- else
- str, count = str:gsub ('^%(%((.+)%)%)$', '%1');
- end
- return str, 0 ~= count;
- end
- --[[--------------------------< S A F E _ F O R _ I T A L I C S >----------------------------------------------
- Protects a string that will be wrapped in wiki italic markup '' ... ''
- Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that
- they will be inverted (i.e. unitalicized) in the resulting references. In addition, <i> and '' tend to interact
- poorly under Mediawiki's HTML tidy.
- ]]
- local function safe_for_italics (str)
- if not is_set (str) then return str end
- if str:sub (1, 1) == "'" then str = "<span></span>" .. str; end
- if str:sub (-1, -1) == "'" then str = str .. "<span></span>"; end
- -- Remove newlines as they break italics.
- return str:gsub ('\n', ' ');
- end
- --[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------
- Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one
- argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason
- this function is similar to but separate from wrap_msg().
- ]]
- local function wrap_style (key, str)
- if not is_set (str) then
- return '';
- elseif in_array (key, cfg.presentation['_safe_for_italics']) then
- str = safe_for_italics (str);
- end
- return substitute (cfg.presentation[key], str);
- end
- --[[--------------------------< M A K E _ S E P _ L I S T >------------------------------------------------------------
- make a separated list of items using provided separators.
- <sep_list> - typically '<comma><space>'
- <sep_list_pair> - typically '<space>and<space>'
- <sep_list_end> - typically '<comma><space>and<space>' or '<comma><space>&<space>'
- defaults to cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], and cfg.presentation['sep_list_end']
- if <sep_list_end> is specified, <sep_list> and <sep_list_pair> must also be supplied
- ]]
- local function make_sep_list (count, list_seq, sep_list, sep_list_pair, sep_list_end)
- local list = '';
- if not sep_list then -- set the defaults
- sep_list = cfg.presentation['sep_list'];
- sep_list_pair = cfg.presentation['sep_list_pair'];
- sep_list_end = cfg.presentation['sep_list_end'];
- end
- if 2 >= count then
- list = table.concat (list_seq, sep_list_pair); -- insert separator between two items; returns list_seq[1] then only one item
- elseif 2 < count then
- list = table.concat (list_seq, sep_list, 1, count - 1); -- concatenate all but last item with plain list separator
- list = table.concat ({list, list_seq[count]}, sep_list_end); -- concatenate last item onto end of <list> with final separator
- end
- return list;
- end
- --[[--------------------------< S A F E _ J O I N >------------------------------------------------------------
- Joins a sequence of strings together while checking for duplicate separation characters.
- ]]
- local function safe_join( tbl, duplicate_char )
- --[[
- Note: we use string functions here, rather than ustring functions.
- This has considerably faster performance and should work correctly as
- long as the duplicate_char is strict ASCII. The strings
- in tbl may be ASCII or UTF8.
- ]]
- local str = ''; -- the output string
- local comp = ''; -- what does 'comp' mean?
- local end_chr = '';
- local trim;
- for _, value in ipairs( tbl ) do
- if value == nil then value = ''; end
- if str == '' then -- if output string is empty
- str = value; -- assign value to it (first time through the loop)
- elseif value ~= '' then
- if value:sub(1,1) == '<' then -- Special case of values enclosed in spans and other markup.
- comp = value:gsub( "%b<>", "" ); -- remove html markup (<span>string</span> -> string)
- else
- comp = value;
- end
- -- typically duplicate_char is sepc
- if comp:sub(1,1) == duplicate_char then -- is first charactier same as duplicate_char? why test first character?
- -- Because individual string segments often (always?) begin with terminal punct for th
- -- preceding segment: 'First element' .. 'sepc next element' .. etc?
- trim = false;
- end_chr = str:sub(-1,-1); -- get the last character of the output string
- -- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
- if end_chr == duplicate_char then -- if same as separator
- str = str:sub(1,-2); -- remove it
- elseif end_chr == "'" then -- if it might be wikimarkup
- if str:sub(-3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''
- str = str:sub(1, -4) .. "''"; -- remove them and add back ''
- elseif str:sub(-5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''
- trim = true; -- why? why do this and next differently from previous?
- elseif str:sub(-4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''
- trim = true; -- same question
- end
- elseif end_chr == "]" then -- if it might be wikimarkup
- if str:sub(-3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink
- trim = true;
- elseif str:sub(-2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
- trim = true;
- elseif str:sub(-4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
- trim = true;
- end
- elseif end_chr == " " then -- if last char of output string is a space
- if str:sub(-2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
- str = str:sub(1,-3); -- remove them both
- end
- end
- if trim then
- if value ~= comp then -- value does not equal comp when value contains html markup
- local dup2 = duplicate_char;
- if dup2:match( "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
- value = value:gsub( "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
- else
- value = value:sub( 2, -1 ); -- remove duplicate_char when it is first character
- end
- end
- end
- str = str .. value; --add it to the output string
- end
- end
- return str;
- end
- --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
- Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
- This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
- markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
- ]]
- local function strip_apostrophe_markup (argument)
- if not is_set (argument) then return argument; end
- while true do
- if argument:match ("%'%'%'%'%'") then -- bold italic (5)
- argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
- elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4)
- argument=argument:gsub("%'%'%'%'", "");
- elseif argument:match ("%'%'%'") then -- bold (3)
- argument=argument:gsub("%'%'%'", "");
- elseif argument:match ("%'%'") then -- italic (2)
- argument=argument:gsub("%'%'", "");
- else
- break;
- end
- end
- return argument; -- done
- end
- --[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >----------------------------------------
- This function searches a parameter's value for nonprintable or invisible characters. The search stops at the
- first match.
- This function will detect the visible replacement character when it is part of the wikisource.
- Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers (gallery, math, pre, ref)
- and identifies them with a slightly different error message. See also coins_cleanup().
- Detects but ignores the character pattern that results from the transclusion of {{'}} templates.
- Output of this function is an error message that identifies the character or the Unicode group, or the stripmarker
- that was detected along with its position (or, for multi-byte characters, the position of its first byte) in the
- parameter value.
- ]]
- local function has_invisible_chars (param, v)
- local position = ''; -- position of invisible char or starting position of stripmarker
- local dummy; -- end of matching string; not used but required to hold end position when a capture is returned
- local capture; -- used by stripmarker detection to hold name of the stripmarker
- local i=1;
- local stripmarker, apostrophe;
- while cfg.invisible_chars[i] do
- local char=cfg.invisible_chars[i][1] -- the character or group name
- local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it
- position, dummy, capture = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern
- if position then
- if 'nowiki' == capture or 'math' == capture or -- nowiki and math stripmarkers (not an error condition)
- ('templatestyles' == capture) then -- templatestyles stripmarker allowed
- stripmarker = true; -- set a flag
- elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker
- position = nil; -- unset
- elseif 'apostrophe' == char then -- apostrophe template uses ‍, hair space and zero-width space
- apostrophe = true;
- elseif true == apostrophe and in_array (char, {'zero width joiner', 'zero width space', 'hair space'}) then
- position = nil; -- unset
- else
- local err_msg;
- if capture then
- err_msg = capture .. ' ' .. cfg.invisible_chars[i][3] or char;
- else
- err_msg = cfg.invisible_chars[i][3] or (char .. ' character');
- end
- return {err_msg, wrap_style ('parameter', param), position}; -- and done with this parameter
- end
- end
- i=i+1; -- bump our index
- end
- end
- --[[--------------------------< W R A P _ M S G >--------------------------------------------------------------
- Applies additional message text to various parameter values. Supplied string is wrapped using a message_list
- configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
- from citation_config.messages - the reason this function is similar to but separate from wrap_style().
- ]]
- local function wrap_msg (key, str, lower)
- if not is_set (str) then
- return '';
- elseif in_array (key, cfg.messages['_safe_for_italics']) then
- str = safe_for_italics (str);
- end
- if true == lower then
- local msg;
- msg = cfg.messages[key]:lower(); -- set the message to lower case before
- return substitute (msg, str); -- including template text
- else
- return substitute (cfg.messages[key], str);
- end
- end
- --[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------
- Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
- This function will positive kern either single or double quotes:
- "'Unkerned title with leading and trailing single quote marks'"
- " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
- Double single quotes (italic or bold wikimarkup) are not kerned.
- Call this function for chapter titles, for website titles, etc; not for book titles.
- ]]
- local function kern_quotes (str)
- local cap='';
- local cap2='';
- cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes
- if is_set (cap) then
- str = wrap_style ('kern-left', {cap, cap2});
- end
- cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")
- if is_set (cap) then
- str = wrap_style ('kern-right', {cap, cap2});
- end
- return str;
- end
- --[[--------------------------< P E N D _ S E P A R A T O R >--------------------------------------------------
- ]]
- local function pend_separator (item, sepc, prepend)
- if prepend then
- return is_set (item) and sepc .. ' ' .. item or '';
- else
- return is_set (item) and item .. sepc .. ' ' or '';
- end
- end
- --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
- Sets local cfg table to same (live or sandbox) as that used by the other modules.
- ]]
- local function set_selected_modules (cfg_table_ptr)
- cfg = cfg_table_ptr;
- end
- --[[--------------------------< E X P O R T S >----------------------------------------------------------------
- ]]
- return {
- first_set = first_set, -- exported functions
- has_accept_as_written = has_accept_as_written,
- has_invisible_chars = has_invisible_chars,
- hyphen_to_dash = hyphen_to_dash,
- in_array = in_array,
- is_set = is_set,
- kern_quotes = kern_quotes,
- make_sep_list = make_sep_list,
- pend_separator = pend_separator,
- safe_join = safe_join,
- substitude = substitude,
- strip_apostrophe_markup = strip_apostrophe_markup,
- substitute = substitute,
- wrap_style = wrap_style,
- wrap_msg = wrap_msg,
- set_selected_modules = set_selected_modules
- }