commit 31d64c739521c48438f05f627af071c69c8a2e13
Author: Kornel Benko <kor...@lyx.org>
Date:   Sun Oct 4 01:43:44 2020 +0200

    Enable handling of spreadsheets in export to docbook5 format.
    
    In cooperation with Thibaut Cuvelier:
    lib/scripts/spreadsheet_to_docbook.py: Strip the document header and 
convert some flags
    lib/xtemplates/gnumeric.xtemplate: use this output to be inserted in 
docbook5
    lib/configure.py: Add needed conversion entries
---
 lib/Makefile.am                       |    2 +
 lib/configure.py                      |   10 +++-
 lib/scripts/spreadsheet_to_docbook.py |   70 +++++++++++++++++++++++++++++++++
 lib/xtemplates/gnumeric.xtemplate     |    4 +-
 4 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/lib/Makefile.am b/lib/Makefile.am
index dca5e0e..8d42271 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -2495,6 +2495,7 @@ dist_scripts_DATA = \
        scripts/lyxknitr.R \
        scripts/lyxstangle.R \
        scripts/lyxsweave.R
+
 # We use DATA now instead of PYTHON because automake 1.11.2 complains.
 # Note that we "chmod 755" manually these files in install-data-hook.
 dist_scripts_DATA += \
@@ -2524,6 +2525,7 @@ dist_scripts_DATA += \
        scripts/prefs2prefs_lfuns.py \
        scripts/prefs2prefs_prefs.py \
        scripts/prefTest.pl.in \
+       scripts/spreadsheet_to_docbook.py \
        scripts/tex_copy.py \
        scripts/TeXFiles.py
 
diff --git a/lib/configure.py b/lib/configure.py
index ec2fafd..6b890d2 100644
--- a/lib/configure.py
+++ b/lib/configure.py
@@ -751,7 +751,7 @@ def checkFormatEntries(dtl_tools):
 \Format pdf6       pdf    "PDF (graphics)"        "" "%%"      ""      
"vector"        "application/pdf"
 \Format pdf7       pdf    "PDF (cropped)"         "" "%%"      ""      
"document,vector"       ""
 \Format pdf8       pdf    "PDF (lower resolution)"         "" "%%"     ""      
"document,vector"       ""
-\Format pdf9       pdf    "PDF (docbook)"         "" "%%"       ""      
"document,vector,menu=export"   ""'''])
+\Format pdf9       pdf    "PDF (docbook)"         "" "%%"      ""      
"document,vector,menu=export"   ""'''])
     #
     checkViewer('a DVI previewer', ['xdvi', 'kdvi', 'okular',
                                     'evince', 'xreader',
@@ -932,8 +932,8 @@ def checkConverterEntries():
     checkProg('an Open Document (Pandoc) -> LaTeX converter', ['pandoc -s -f 
odt -o $$o -t latex $$i'],
         rc_entry = [ r'\converter odt3        latex      "%%"  ""' ])
     #
-    checkProg('DocBook converter -> PDF (docbook)', ['pandoc -f docbook -t 
latex --latex-engine=lualatex --toc -o $$o $$i'],
-        rc_entry = [ r'\converter docbook5      pdf9      "%%" ""' ])
+    checkProg('DocBook converter -> PDF (docbook)', ['pandoc -f docbook -t 
latex --latex-engine=lualatex --toc --template=$$s/xtemplates/lyx.latex -o $$o 
$$i'],
+        rc_entry = [ r'\converter docbook5      pdf9      "%%" ""' ])
     #
     checkProg('a MS Word Office Open XML converter -> LaTeX', ['pandoc -s -f 
docx -o $$o -t latex $$i'],
         rc_entry = [ r'\converter word2      latex      "%%"   ""' ])
@@ -1176,6 +1176,10 @@ def checkConverterEntries():
 \converter oocalc html_table "ssconvert --export-type=Gnumeric_html:html40frag 
$$i $$o" ""
 \converter excel  html_table "ssconvert --export-type=Gnumeric_html:html40frag 
$$i $$o" ""
 \converter excel2 html_table "ssconvert --export-type=Gnumeric_html:html40frag 
$$i $$o" ""
+\converter gnumeric xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py 
$$i $$o" ""
+\converter oocalc xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py 
$$i $$o" ""
+\converter excel  xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py 
$$i $$o" ""
+\converter excel2 xhtml_table "python $$s/scripts/spreadsheet_to_docbook.py 
$$i $$o" ""
 '''])
 
     path, lilypond = checkProg('a LilyPond -> EPS/PDF/PNG converter', 
['lilypond'])
diff --git a/lib/scripts/spreadsheet_to_docbook.py 
b/lib/scripts/spreadsheet_to_docbook.py
new file mode 100644
index 0000000..d65dcba
--- /dev/null
+++ b/lib/scripts/spreadsheet_to_docbook.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python3
+
+# file spreadsheet_to_docbook.py
+# This file is part of LyX, the document processor.
+# Licence details can be found in the file COPYING.
+
+# author Thibaut Cuvelier & Kornel Benko
+
+# Full author contact details are available in file CREDITS.
+
+"""reformat output of ssconvert of a single spreadsheet to match the needs
+of docbook5 table format .
+
+Expects to read from file specified by sys.argv[1]
+and output to to file specified by sys.argv[2]
+"""
+
+import re
+import sys
+import subprocess
+
+
+def process_file(contents):
+    # Scrap the header and the footer.
+    contents = contents.split("<body>")[1]
+    contents = contents.split("</body>")[0]
+
+    # Gnumeric may generate more than one table, just take the first one.
+    contents = contents.split("</table>")[0] + "\n</table>"
+
+    # Convert the rest of the table to DocBook.
+    contents = contents.replace("<p></p>", "")
+    contents = contents.replace("<i>", "<emphasis>")
+    contents = contents.replace("</i>", "</emphasis>")
+    contents = contents.replace("<b>", "<emphasis role='bold'>")
+    contents = contents.replace("</b>", "</emphasis>")
+    contents = contents.replace("<u>", "<emphasis role='underline'>")
+    contents = contents.replace("</u>", "</emphasis>")
+
+    contents = re.sub(r"<font color=\"(.*)\">", "<phrase role='color \\1'>", 
contents)
+    assert '<font' not in contents  # If this happens, implement something to 
catch these cases.
+    contents = contents.replace("</font>", "</phrase>")  # Generates invalid 
XML if there are still font tags left...
+
+    # If the table has a caption, then the right tag is <table>. Otherwise, 
it's <informaltable>.
+    if '<caption>' not in contents:
+        contents = contents.replace("<table", "<informaltable")
+        contents = contents.replace("</table>", "</informaltable>")
+
+    # Return the processed string.
+    contents = contents.replace("\n\n", "\n")
+    return contents
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        # Read from stdin, output to stdout.
+        contents = sys.stdin.read()
+        f = sys.stdout
+    else:
+        # Read from output of ssconvert
+        assert len(sys.argv) == 3  # Script name, file to process, output file.
+        proc = subprocess.Popen(["ssconvert", 
"--export-type=Gnumeric_html:xhtml", sys.argv[1], "fd://1"], 
stdout=subprocess.PIPE)
+        f = open(sys.argv[2], 'w')
+        sys.stdout = f  # Redirect stdout to the output file.
+        contents = proc.stdout.read()
+
+    # Process and output to stdout.
+    print(process_file(contents))
+    f.close()
+    exit(0)
diff --git a/lib/xtemplates/gnumeric.xtemplate 
b/lib/xtemplates/gnumeric.xtemplate
index 56dbe97..2bbee1c 100644
--- a/lib/xtemplates/gnumeric.xtemplate
+++ b/lib/xtemplates/gnumeric.xtemplate
@@ -52,7 +52,9 @@ Template GnumericSpreadsheet
                Product "[Spreadsheet: $$FName]"
        FormatEnd
        Format DocBook
-               Product "[Spreadsheet: $$FName]"
+               Product "$$Contents(\"$$AbsPath$$Basename.xhtml\")"
+               UpdateFormat xhtml_table
+               UpdateResult "$$AbsPath$$Basename.xhtml"
        FormatEnd
        Format XHTML
                Product "$$Contents(\"$$AbsPath$$Basename.html\")"
-- 
lyx-cvs mailing list
lyx-cvs@lists.lyx.org
http://lists.lyx.org/mailman/listinfo/lyx-cvs

Reply via email to