On 7/28/2021 7:24 PM, Thomas A. Schmitz via ntg-context wrote:
Hi all,

just two quick questions: a couple of weeks ago, there was a long discussion on ligature exceptions in, e.g., German. Hans provided an elegant new mechanism (\startlanguageoptions). So my two questions:

1. Is the old mechanism (\replaceword[eg][Auflage][Au{fl}age]) now obsolete? I tried using it, but unwanted ligatures still occurred. If it is indeed deprecated, I'll add a note to the wiki (this would mean that the current texlive version does not have the new mechanism and the latest lmtx doesn't use the old one, so that's somewhat problematic).

it's not too hard to make it work, see attached, but the new one is nicer because it permits more detailed control

in luametatex you can do things like

Au{\norightligaturing f}lage

that is: each character can have a set of options; ok, one can do that with attributes but that is less efficients options have been introduced for ligatures, kerns, expansion etc (you can diff the attached lmt file with the lua file of the same name to see the difference in approach

2. Both mechanisms only work for complete words, is that right? So I can't just provide a pattern such as "uf|forder," I have to provide a full list with all inflected forms:

auf|fordern
auf|fordert
auf|fordernd
Auf|forderung
Auf|forderungen

and for 1-3 also the forms with a capital letter.

just lowercase will do

if you look at the example files you can see that there are pre/post snippets possibel so basically you can create something

pre [a|b|c] post

which saves some definitions (make we can have pre/post lists which will nicely explode into huge lists but memory is not the issue here)

while the replacement works by running over the node list, the second mechanism is using a callback at the hyphenation level (so per language) where each word is anyway checked against an exception list, so in addition that word can now be fed into a function and depending on outcome be fed back into the machinery

(there are trackers that give insight in the process)

Hans

-----------------------------------------------------------------
                                          Hans Hagen | PRAGMA ADE
              Ridderstraat 27 | 8061 GH Hasselt | The Netherlands
       tel: 038 477 53 69 | www.pragma-ade.nl | www.pragma-pod.nl
-----------------------------------------------------------------
if not modules then modules = { } end modules ['lang-rep'] = {
    version   = 1.001,
    comment   = "companion to lang-rep.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- A BachoTeX 2013 experiment, probably not that useful. Eventually I used a 
simpler
-- more generic example. I'm sure no one ever notices of even needs this code.
--
-- As a follow up on a question by Alan about special treatment of dropped caps 
I wonder
-- if I can make this one more clever (probably in a few more dev steps). For 
instance
-- injecting nodes or replacing nodes. It's a prelude to a kind of lpeg for 
nodes,
-- although (given experiences so far) we don't really need that. After all, 
each problem
-- is somewhat unique.

local type, tonumber, next = type, tonumber, next
local gmatch, gsub = string.gmatch, string.gsub
local utfbyte, utfsplit = utf.byte, utf.split
local P, C, U, Cc, Ct, Cs, lpegmatch = lpeg.P, lpeg.C, 
lpeg.patterns.utf8character, lpeg.Cc, lpeg.Ct, lpeg.Cs, lpeg.match
local find = string.find

local zwnj     =  0x200C
local grouped  = P("{") * ( Ct((U/utfbyte-P("}"))^1) + Cc(false) ) * P("}")-- 
grouped
local splitter = Ct((
                    #P("{") * (
                        P("{}") / function() return zwnj end
                      + Ct(Cc("discretionary") * grouped * grouped * grouped)
                      + Ct(Cc("noligature")    * grouped)
                    )
                  + U/utfbyte
                )^1)

local stripper = P("{") * Cs((1-P(-2))^0) * P("}") * P(-1)

local trace_replacements = false  trackers.register("languages.replacements",   
      function(v) trace_replacements = v end)
local trace_details      = false  
trackers.register("languages.replacements.details", function(v) trace_details   
   = v end)

local report_replacement = logs.reporter("languages","replacements")

local glyph_code         = nodes.nodecodes.glyph
local glue_code          = nodes.nodecodes.glue

local spaceskip_code     = nodes.gluecodes.spaceskip
local xspaceskip_code    = nodes.gluecodes.xspaceskip

local nuts               = nodes.nuts

local getnext            = nuts.getnext
local getprev            = nuts.getprev
local getattr            = nuts.getattr
local getid              = nuts.getid
local getsubtype         = nuts.getsubtype
local getchar            = nuts.getchar
local isglyph            = nuts.isglyph

local setlink            = nuts.setlink
local setnext            = nuts.setnext
local setprev            = nuts.setprev
local setchar            = nuts.setchar
local setattrlist        = nuts.setattrlist
local setoptions         = nuts.setoptions

local glyphoptioncodes       = tex.glyphoptioncodes
local norightligature_option = glyphoptioncodes.norightligature
local noleftligature_option  = glyphoptioncodes.noleftligature

local insertbefore       = nuts.insertbefore
local insertafter        = nuts.insertafter
local remove_node        = nuts.remove
local copy_node          = nuts.copy
local flushlist          = nuts.flushlist

local nodepool           = nuts.pool
local new_disc           = nodepool.disc

local texsetattribute    = tex.setattribute
local unsetvalue         = attributes.unsetvalue

local enableaction       = nodes.tasks.enableaction

local v_reset            = interfaces.variables.reset

local implement          = interfaces.implement

local processors         = typesetters.processors
local splitprocessor     = processors.split

local replacements       = languages.replacements or { }
languages.replacements   = replacements

local a_replacements     = attributes.private("replacements")

local lists = { }
local last  = 0
local trees = { }

table.setmetatableindex(lists,function(lists,name)
    last = last + 1
    local list = { }
    local data = { name = name, list = list, attribute = last }
    lists[last] = data
    lists[name] = data
    trees[last] = list
    return data
end)

lists[v_reset].attribute = unsetvalue -- so we discard 0

-- todo: glue kern attr

local function add(root,word,replacement)
    local processor, replacement = splitprocessor(replacement,true) -- no check
    replacement = lpegmatch(stripper,replacement) or replacement
    local list = utfsplit(word) -- ,true)
    local size = #list
    for i=1,size do
        local l = utfbyte(list[i])
        if not root[l] then
            root[l] = { }
        end
        if i == size then
            local special = find(replacement,"{",1,true)
            local newlist = lpegmatch(splitter,replacement)
            root[l].final = {
                word        = word,
                replacement = replacement,
                processor   = processor,
                oldlength   = size,
                newcodes    = newlist,
                special     = special,
            }
        end
        root = root[l]
    end
end

function replacements.add(category,word,replacement)
    local root = lists[category].list
    if type(word) == "table" then
        for word, replacement in next, word do
            add(root,word,replacement)
        end
    else
        add(root,word,replacement or "")
    end
end

-- local strip = lpeg.stripper("{}")

function languages.replacements.addlist(category,list)
    local root = lists[category].list
    if type(list) == "string" then
        for new in gmatch(list,"%S+") do
            local old = gsub(new,"[{}]","")
         -- local old = lpegmatch(strip,new)
            add(root,old,new)
        end
    else
        for i=1,#list do
            local new = list[i]
            local old = gsub(new,"[{}]","")
         -- local old = lpegmatch(strip,new)
            add(root,old,new)
        end
    end
end

local function tonodes(list,template)
    local head, current
    for i=1,#list do
        local new = copy_node(template)
        setchar(new,list[i])
        if head then
            head, current = insertafter(head,current,new)
        else
            head, current = new, new
        end
    end
    return head
end

local is_punctuation = characters.is_punctuation

-- We can try to be clever and use the fact that there is no match to skip
-- over to the next word but it is gives fuzzy code so for now I removed
-- that optimization (when I really need a high performance version myself
-- I will look into it (but so far I never used this mechanism myself).
--
-- We used to have the hit checker as function but is got messy when checks
-- for punctuation was added.

local function replace(head,first,last,final,hasspace,overload)
    local current   = first
    local prefirst  = getprev(first) or head
    local postlast  = getnext(last)
    local oldlength = final.oldlength
    local newcodes  = final.newcodes
    local newlength = newcodes and #newcodes or 0
    if trace_replacements then
        report_replacement("replacing word %a by 
%a",final.word,final.replacement)
    end
    if hasspace or final.special then
        -- It's easier to delete and insert so we do just that. On the todo 
list is
        -- turn injected spaces into glue but easier might be to let the char 
break
        -- handler do that ...
        local prev = getprev(current)
        local next = getnext(last)
        local list = current
        setnext(last)
        setlink(prev,next)
        current = prev
        if not current then
            head = nil
        end
        local i = 1
        while i <= newlength do
            local codes = newcodes[i]
            if type(codes) == "table" then
                local method = codes[1]
                if method == "discretionary" then
                    local pre, post, replace = codes[2], codes[3], codes[4]
                    if pre then
                        pre = tonodes(pre,first)
                    end
                    if post then
                        post = tonodes(post,first)
                    end
                    if replace then
                        replace = tonodes(replace,first)
                    end
                    -- todo: also set attr
                    local new = new_disc(pre,post,replace)
                    setattrlist(new,first)
                    head, current = insertafter(head,current,new)
                elseif method == "noligature" then
                    -- not that efficient to copy but ok for testing
                    local list = codes[2]
                    if list then
                        local n = #list
                        for i=1,n do
                            local new = copy_node(first)
                            setchar(new,list[i])
                            if i == 1 then
                                setoptions(new,norightligature_option)
                            elseif i == n then
                                setoptions(new,glyphoptioncodes.noleftligature 
| norightligature_option)
                            else
                                setoptions(new,glyphoptioncodes.noleftligature)
                            end
                            head, current = insertafter(head,current,new)
                        end
                    else
                     -- local new = copy_node(first)
                     -- setchar(new,zwnj)
                     -- head, current = insertafter(head,current,new)
                        setoptions(current,norightligature_option)
                    end
                else
                    report_replacement("unknown method %a",method or "?")
                end
            else
                local new = copy_node(first)
                setchar(new,codes)
                head, current = insertafter(head,current,new)
            end
            i = i + 1
        end
        flushlist(list)
    elseif newlength == 0 then
        -- we overload
    elseif oldlength == newlength then
        if final.word ~= final.replacement then
            for i=1,newlength do
                setchar(current,newcodes[i])
                current = getnext(current)
            end
        end
        current = getnext(final)
    elseif oldlength < newlength then
        for i=1,newlength-oldlength do
            local n = copy_node(current)
            setchar(n,newcodes[i])
            head, current = insertbefore(head,current,n)
            current = getnext(current)
        end
        for i=newlength-oldlength+1,newlength do
            setchar(current,newcodes[i])
            current = getnext(current)
        end
    else
        for i=1,oldlength-newlength do
            head, current = remove_node(head,current,true)
        end
        for i=1,newlength do
            setchar(current,newcodes[i])
            current = getnext(current)
        end
    end
    if overload then
        overload(final,getnext(prefirst),getprev(postlast))
    end
    return head, postlast
end

-- we handle just one space

function replacements.handler(head)
    local current   = head
    local overload  = attributes.applyoverloads
    local mode      = false -- we're in word or punctuation mode
    local wordstart = false
    local wordend   = false
    local prevend   = false
    local prevfinal = false
    local tree      = false
    local root      = false
    local hasspace  = false
    while current do
        local id = getid(current) -- or use the char getter
        if id == glyph_code then
            local a = getattr(current,a_replacements)
            if a then
                -- we have a run
                tree = trees[a]
                if tree then
                    local char = getchar(current)
                    local punc = is_punctuation[char]
                    if mode == "punc" then
                        if not punc then
                            if root then
                                local final = root.final
                                if final then
                                    head = 
replace(head,wordstart,wordend,final,hasspace,overload)
                                elseif prevfinal then
                                    head = 
replace(head,wordstart,prevend,prevfinal,hasspace,overload)
                                end
                                prevfinal = false
                                root = false
                            end
                            mode = "word"
                        end
                    elseif mode == "word" then
                        if punc then
                            if root then
                                local final = root.final
                                if final then
                                    head = 
replace(head,wordstart,wordend,final,hasspace,overload)
                                elseif prevfinal then
                                    head = 
replace(head,wordstart,prevend,prevfinal,hasspace,overload)
                                end
                                prevfinal = false
                                root = false
                            end
                            mode = "punc"
                        end
                    else
                        mode = punc and "punc" or "word"
                    end
                    if root then
                        root = root[char]
                        if root then
                            wordend = current
                        end
                    else
                        if prevfinal then
                            head = 
replace(head,wordstart,prevend,prevfinal,hasspace,overload)
                            prevfinal = false
                        end
                        root = tree[char]
                        if root then
                            wordstart = current
                            wordend   = current
                            prevend   = false
                            hasspace  = false
                        end
                    end
                else
                    root= false
                end
            else
                tree = false
            end
            current = getnext(current)
        elseif root then
            local final = root.final
            if mode == "word" and id == glue_code then
                local s = getsubtype(current)
                if s == spaceskip_code or s == xspaceskip_code then
                    local r = root[32] -- maybe more types
                    if r then
                        if not prevend then
                            local f = root.final
                            if f then
                                prevend   = wordend
                                prevfinal = f
                            end
                        end
                        wordend  = current
                        root     = r
                        hasspace = true
                        goto moveon
                    end
                end
            end
            if final then
                head, current = 
replace(head,wordstart,wordend,final,hasspace,overload)
            elseif prevfinal then
                head, current = 
replace(head,wordstart,prevend,prevfinal,hasspace,overload)
            end
            prevfinal = false
            root = false
          ::moveon::
            current = getnext(current)
        else
            current = getnext(current)
        end
    end
    if root then
        local final = root.final
        if final then
            head = replace(head,wordstart,wordend,final,hasspace,overload)
        elseif prevfinal then
            head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
        end
    end
    return head
end

local enabled = false

function replacements.set(n)
    if n == v_reset then
        n = unsetvalue
    else
        n = lists[n].attribute
        if not enabled then
            enableaction("processors","languages.replacements.handler")
            if trace_replacements then
                report_replacement("enabling replacement handler")
            end
            enabled = true
        end
    end
    texsetattribute(a_replacements,n)
end

-- interface

implement {
    name      = "setreplacements",
    actions   = replacements.set,
    arguments = "string"
}

implement {
    name      = "addreplacements",
    actions   = replacements.add,
    arguments = "3 strings",
}

implement {
    name      = "addreplacementslist",
    actions   = replacements.addlist,
    arguments = "2 strings",
}
%D \module
%D   [       file=lang-rep,
%D        version=2013.04.28,
%D          title=\CONTEXT\ Language Macros,
%D       subtitle=Substitution,
%D         author=Hans Hagen,
%D           date=\currentdate,
%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
%C
%C This module is part of the \CONTEXT\ macro||package and is
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.

%D As I needed an example of messing with nodes for the bacho\TEX\ tutorial
%D I cooked up this. In the end I decided to stick to a simpler example and
%D just finished this off in case someone really needs it.

\writestatus{loading}{ConTeXt Language Macros / Replacements}

\unprotect

\registerctxluafile{lang-rep}{autosuffix}

\definesystemattribute[replacements][public,global]

%D \startluacode
%D
%D     -- todo: other nodes (prelude to more experiments with auto dropped caps)
%D
%D     languages.replacements.add("basics", {
%D         ["aap"]   = "monkey",
%D         ["noot"]  = "nut",
%D         ["never"] = "forever",
%D         ["newer"] = "cooler",
%D         ["new"]   = "cool",
%D      -- ["special"] = "veryspe{>>>}{<<<}{=}cial",
%D     })
%D
%D \stopluacode
%D
%D \replaceword[more][this][that]
%D \replaceword[more][crap][support]
%D \replaceword[more][---][—]
%D \replaceword[basics][special][veryspe{>>>}{<<<}{=}cial]
%D
%D \starttyping
%D \start \setreplacements[basics] What the heck, it's now or never, isn't it 
new? \par \stop
%D \start \setreplacements[more]   Do we --- {\it really} --- need this kind of 
crap? \par \stop
%D \start \setreplacements[basics] All kinds of special thingies! \par \stop
%D \start \setreplacements[basics] \hsize1mm special \par \stop
%D \stoptyping

\permanent\protected\def\setreplacements[#1]%
  {\clf_setreplacements{#1}}

\permanent\protected\def\resetreplacements
  {\c_attr_replacements\attributeunsetvalue}

\permanent\tolerant\protected\def\replaceword[#1]#*[#2]#*[#3]%
  {\ifarguments\or\or
     \clf_addreplacementslist{#1}{#2}%
   \or
     \clf_addreplacements{#1}{#2}{#3}%
   \fi}

\appendtoks
    \resetreplacements
\to \everyresettypesetting

\appendtoks
    \resetreplacements
\to \everyinitializeverbatim

\protect \endinput
___________________________________________________________________________________
If your question is of interest to others as well, please add an entry to the 
Wiki!

maillist : ntg-context@ntg.nl / http://www.ntg.nl/mailman/listinfo/ntg-context
webpage  : http://www.pragma-ade.nl / http://context.aanhet.net
archive  : https://bitbucket.org/phg/context-mirror/commits/
wiki     : http://contextgarden.net
___________________________________________________________________________________

Reply via email to