commit c5a1bbbee3061c2526af7e87b7688e5c8fd55b8b
Author: Juergen Spitzmueller <[email protected]>
Date:   Tue Nov 25 09:36:31 2025 +0100

    tex2lyx: support for language-specific charstyles
---
 src/tests/CMakeLists.txt   |   4 +-
 src/tex2lyx/CMakeLists.txt |   4 +-
 src/tex2lyx/Makefile.am    |   1 +
 src/tex2lyx/dummy_impl.cpp |  20 +++++++++
 src/tex2lyx/tex2lyx.cpp    |  34 +++++++++++++-
 src/tex2lyx/tex2lyx.h      |   1 +
 src/tex2lyx/text.cpp       | 109 ++++++++++++++++++++++++++-------------------
 7 files changed, 120 insertions(+), 53 deletions(-)

diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 476e6470e9..de466c0b01 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -6,7 +6,7 @@
 
 set(check_layout_SOURCES)
 foreach(_f insets/InsetLayout.cpp CiteEnginesList.cpp Color.cpp Counters.cpp 
Floating.cpp
-       FloatList.cpp FontInfo.cpp Layout.cpp LayoutFile.cpp support/Lexer.cpp
+       FloatList.cpp FontInfo.cpp Language.cpp Layout.cpp LayoutFile.cpp 
support/Lexer.cpp
        ModuleList.cpp Spacing.cpp SpecialChar.cpp TextClass.cpp 
tests/check_layout.cpp
        tests/dummy_functions.cpp tests/dummy4checklayout.cpp)
   list(APPEND check_layout_SOURCES "${TOP_SRC_DIR}/src/${_f}")
@@ -120,7 +120,7 @@ if (${LYX_DEBUG_SANITIZE} MATCHES "ADDRESS")
     handle_source_option("CXX" ${TOP_SRC_DIR}/src/${_base}.cpp 
-Wno-maybe-uninitialized)
   endforeach()
 endif()
-foreach(_base tests/dummy4checklayout Counters FontInfo Layout LayoutFile
+foreach(_base tests/dummy4checklayout Counters FontInfo Language Layout 
LayoutFile
        SpecialChar TextClass insets/InsetLayout tests/check_layout)
   handle_source_option("CXX" ${TOP_SRC_DIR}/src/${_base}.cpp -Wno-shadow)
 endforeach()
diff --git a/src/tex2lyx/CMakeLists.txt b/src/tex2lyx/CMakeLists.txt
index fbabf07314..5fee354821 100644
--- a/src/tex2lyx/CMakeLists.txt
+++ b/src/tex2lyx/CMakeLists.txt
@@ -12,7 +12,7 @@ set(LINKED_headers)
 
 foreach(_src graphics/GraphicsParams insets/ExternalTemplate
        insets/ExternalTransforms insets/InsetLayout Author CiteEnginesList 
Color Counters
-       Encoding FloatList Floating FontInfo LaTeXPackages Layout
+       Encoding FloatList Floating FontInfo Language LaTeXPackages Layout
        LayoutFile LayoutModuleList support/Lexer ModuleList TextClass
        LaTeXColors Spacing SpecialChar version)
        list(APPEND LINKED_sources ${TOP_SRC_DIR}/src/${_src}.cpp)
@@ -30,7 +30,7 @@ if (${LYX_DEBUG_SANITIZE} MATCHES "ADDRESS")
   endforeach()
 endif()
 foreach(_base tex2lyx/Context tex2lyx/Parser tex2lyx/Preamble tex2lyx/text
-       Counters FontInfo Layout LayoutFile LayoutModuleList SpecialChar 
TextClass
+       Counters FontInfo Language Layout LayoutFile LayoutModuleList 
SpecialChar TextClass
        insets/InsetLayout tex2lyx/dummy_impl tex2lyx/math tex2lyx/table 
tex2lyx/tex2lyx
   )
   handle_source_option("CXX" ${TOP_SRC_DIR}/src/${_base}.cpp -Wno-shadow)
diff --git a/src/tex2lyx/Makefile.am b/src/tex2lyx/Makefile.am
index 3a1db4ca76..a3c231f1b2 100644
--- a/src/tex2lyx/Makefile.am
+++ b/src/tex2lyx/Makefile.am
@@ -99,6 +99,7 @@ LYX_OBJS = \
        ../FloatList.o \
        ../Floating.o \
        ../FontInfo.o \
+       ../Language.o \
        ../LaTeXPackages.o \
        ../LaTeXColors.o \
        ../Layout.o \
diff --git a/src/tex2lyx/dummy_impl.cpp b/src/tex2lyx/dummy_impl.cpp
index fa3aef2628..3214334b3e 100644
--- a/src/tex2lyx/dummy_impl.cpp
+++ b/src/tex2lyx/dummy_impl.cpp
@@ -21,6 +21,7 @@
 #include "tex2lyx.h"
 #include "LaTeXColors.h"
 #include "LaTeXFeatures.h"
+#include "LaTeXFonts.h"
 #include "LyXRC.h"
 #include "xml.h"
 
@@ -63,6 +64,25 @@ LaTeXColors & theLaTeXColors()
        return * lc;
 }
 
+//
+// Dummy definitions needed by Language
+//
+
+LaTeXFonts & theLaTeXFonts()
+{
+       LaTeXFonts * lc = new LaTeXFonts;
+       return * lc;
+}
+
+LaTeXFont LaTeXFonts::getLaTeXFont(docstring const &)
+{
+       return LaTeXFont();
+}
+
+bool LaTeXFont::hasFontenc(std::string const &) const
+{
+       return false;
+}
 
 //
 // Dummy translation support (needed at many places)
diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp
index 0787c2bdec..e7870936a2 100644
--- a/src/tex2lyx/tex2lyx.cpp
+++ b/src/tex2lyx/tex2lyx.cpp
@@ -17,6 +17,7 @@
 
 #include "Context.h"
 #include "Encoding.h"
+#include "Language.h"
 #include "Layout.h"
 #include "LayoutFile.h"
 #include "LayoutModuleList.h"
@@ -492,8 +493,8 @@ bool isProvided(string const & name)
 }
 
 
-bool isKnownInsetSpecialChar(string const & latex, string & lyxname, bool 
const only_protected,
-                            bool const partof)
+bool isKnownInsetSpecialChar(string const & latex, string & lyxname, string 
const & language,
+                            bool const only_protected, bool const partof)
 {
        for (auto const & [name, sc] : textclass.specialChars()) {
                if (partof && prefixIs(to_ascii(ltrim(sc.latex_output, "\\")), 
latex)) {
@@ -505,6 +506,19 @@ bool isKnownInsetSpecialChar(string const & latex, string 
& lyxname, bool const
                        return only_protected ? sc.need_protect : true;
                }
        }
+       Language * lang = 
const_cast<Language*>(languages.getLanguage(language));
+       if (!lang)
+               return false;
+       for (auto const & [name, sc] : lang->specialChars()) {
+               if (partof && prefixIs(to_ascii(ltrim(sc.latex_output, "\\")), 
latex)) {
+                       lyxname = name;
+                       return only_protected ? sc.need_protect : true;
+               }
+               if (to_ascii(ltrim(sc.latex_output, "\\")) == latex) {
+                       lyxname = name;
+                       return only_protected ? sc.need_protect : true;
+               }
+       }
        return false;
 }
 
@@ -938,6 +952,18 @@ bool roundtripMode()
        return roundtrip;
 }
 
+bool readLanguagesFile(string const & name)
+{
+       LYXERR(Debug::INIT, "About to read " << name << "...");
+
+       FileName const lang_path = libFileSearch(string(), name);
+       if (lang_path.empty())
+               return false;
+
+       languages.read(lang_path);
+       return true;
+}
+
 
 namespace {
 
@@ -965,6 +991,10 @@ bool tex2lyx(idocstream & is, ostream & os, string const & 
encoding,
                return false;
        }
 
+       // Load language definitions if not already done
+       if (languages.size() == 0 && !readLanguagesFile("languages"))
+               error_message("Could not load languages file.");
+
        // Load preloaded modules.
        // This needs to be done after the preamble is parsed, since the text
        // class may not be known before. It needs to be done before parsing
diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h
index 598923e4c3..1b853ef79b 100644
--- a/src/tex2lyx/tex2lyx.h
+++ b/src/tex2lyx/tex2lyx.h
@@ -130,6 +130,7 @@ extern bool checkModule(std::string const & name, bool 
command);
 extern bool isProvided(std::string const & name);
 /// Is this a special character supported by InsetSpecialChar?
 extern bool isKnownInsetSpecialChar(std::string const & latex, std::string & 
lyxname,
+                                   std::string const & language,
                                    bool const only_protected = false,
                                    bool const partof = false);
 // Access to environment stack
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp
index f8e19205d3..919e1bcbd1 100644
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -3254,6 +3254,59 @@ void fix_child_filename(string & name)
 }
 
 
+bool parseSpecialChar(ostream & os, Token const & t, Parser & p, Context & 
context)
+{
+       // (Single) special chars supported via InsetSpecialChar
+       string lyxname;
+       if (isKnownInsetSpecialChar(t.cs(), lyxname, context.font.language)
+           || (t.cs() == "protect"
+               && p.next_token().cat() == catEscape
+               && isKnownInsetSpecialChar(p.next_token().cs(), lyxname,
+                                          context.font.language, true))) {
+               // LyX sometimes puts a \protect in front, so we have to ignore 
it
+               if (t.cs() == "protect")
+                       p.get_token();
+               context.check_layout(os);
+               os << "\\SpecialChar " << lyxname << '\n';
+               skip_spaces_braces(p);
+               return true;
+       }
+       // And multi-token special chars
+       if (isKnownInsetSpecialChar(t.cs(), lyxname, context.font.language, 
false, true)
+           || (t.cs() == "protect"
+               && p.next_token().cat() == catEscape
+               && isKnownInsetSpecialChar(p.next_token().cs(), lyxname,
+                                          context.font.language, true, true))) 
{
+               p.pushPosition();
+               string latex;
+               if (t.cs() == "protect") {
+                       // ignore \protect
+                       latex = p.next_token().cs();
+                       p.get_token();
+               } else
+                       latex = t.cs();
+               // Try to the complete token sequence as long as it is known
+               // as part of a known special char sequence
+               while (isKnownInsetSpecialChar(latex + p.next_token().cs(), 
lyxname,
+                                              context.font.language, false, 
true)) {
+                       latex += p.next_token().cs();
+                       p.get_token();
+               }
+               // It this a known complete sequence?
+               if (isKnownInsetSpecialChar(latex, lyxname, 
context.font.language)) {
+                       context.check_layout(os);
+                       os << "\\SpecialChar " << lyxname << '\n';
+                       skip_braces(p);
+                       p.dropPosition();
+                       return true;
+               }
+               // If not, go back and fall through
+               p.popPosition();
+       }
+       return false;
+}
+
+
 void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                Context & context, string const & rdelim, string const & 
rdelimesc)
 {
@@ -3465,12 +3518,16 @@ void parse_text(Parser & p, ostream & os, unsigned 
flags, bool outer,
                }
 
                // babel shorthands (also used by polyglossia)
-               // Since these can have different meanings for different 
languages
-               // we import them as ERT (but they must be put in ERT to get 
output
-               // verbatim).
                if (t.asInput() == "\"") {
+                       // First check if shorthand is supported
+                       // via language-specific SpecialChar
+                       if (parseSpecialChar(os, t, p, context))
+                               continue;
+                       // Unsupported shorthands: Since these can have 
different
+                       // meanings for different languages,
+                       // we import them as ERT (to get output verbatim).
                        string s = "\"";
-                       // We put the known shorthand pairs together in
+                       // We put known shorthand pairs together in
                        // one ERT inset. In other cases (such as "a), only
                        // the quotation mark is ERTed.
                        if (is_known(p.next_token().asInput(), 
known_babel_shorthands)) {
@@ -6688,50 +6745,8 @@ void parse_text(Parser & p, ostream & os, unsigned 
flags, bool outer,
                        name += '{' + p.verbatim_item() + '}';
                }
 
-               // (Single) special chars supported via InsetSpecialChar
-               string lyxname;
-               if (isKnownInsetSpecialChar(t.cs(), lyxname)
-                   || (t.cs() == "protect"
-                       && p.next_token().cat() == catEscape
-                       && isKnownInsetSpecialChar(p.next_token().cs(), 
lyxname, true))) {
-                       // LyX sometimes puts a \protect in front, so we have 
to ignore it
-                       if (t.cs() == "protect")
-                               p.get_token();
-                       context.check_layout(os);
-                       os << "\\SpecialChar " << lyxname << '\n';
-                       skip_spaces_braces(p);
+               if (parseSpecialChar(os, t, p, context))
                        continue;
-               }
-               // And multi-token special chars
-               if (isKnownInsetSpecialChar(t.cs(), lyxname, false, true)
-                   || (t.cs() == "protect"
-                       && p.next_token().cat() == catEscape
-                       && isKnownInsetSpecialChar(p.next_token().cs(), 
lyxname, true, true))) {
-                       p.pushPosition();
-                       string latex;
-                       if (t.cs() == "protect") {
-                               // ignore \protect
-                               latex = p.next_token().cs();
-                               p.get_token();
-                       } else
-                               latex = t.cs();
-                       // Try to the complete token sequence as long as it is 
known
-                       // as part of a known special char sequence
-                       while (isKnownInsetSpecialChar(latex + 
p.next_token().cs(), lyxname, false, true)) {
-                               latex += p.next_token().cs();
-                               p.get_token();
-                       }
-                       // It this a known complete sequence?
-                       if (isKnownInsetSpecialChar(latex, lyxname)) {
-                               context.check_layout(os);
-                               os << "\\SpecialChar " << lyxname << '\n';
-                               skip_braces(p);
-                               p.dropPosition();
-                               continue;
-                       }
-                       // If not, go back and fall through
-                       p.popPosition();
-               }
 
                // now get the character from unicodesymbols
                bool termination;
-- 
lyx-cvs mailing list
[email protected]
https://lists.lyx.org/mailman/listinfo/lyx-cvs

Reply via email to