commit 2c6537ff6635cb7e023b284be16705ea2ff87a10 Author: Thibaut Cuvelier <tcuvel...@lyx.org> Date: Wed Nov 18 01:51:05 2020 +0100
DocBook: ensure that <info>-related insets in the abstract are not generated in the abstract. This helps generate more conformant DocBook files. Also implement wrapper tags for InsetText. --- autotests/export/docbook/svglo.lyx | 215 ++++++++++++++++++++++++++++++++++++ autotests/export/docbook/svglo.xml | 19 +++ lib/layouts/svglobal3.layout | 5 + src/OutputParams.h | 3 + src/Paragraph.cpp | 2 +- src/insets/InsetLayout.cpp | 22 +++- src/insets/InsetLayout.h | 4 + src/insets/InsetText.cpp | 37 +++++-- src/output_docbook.cpp | 135 +++++++++++++++-------- 9 files changed, 381 insertions(+), 61 deletions(-) diff --git a/autotests/export/docbook/svglo.lyx b/autotests/export/docbook/svglo.lyx new file mode 100644 index 0000000..397f144 --- /dev/null +++ b/autotests/export/docbook/svglo.lyx @@ -0,0 +1,215 @@ +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 599 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass svglobal3 +\begin_preamble +\RequirePackage{fix-cm} + +\smartqed % flush right qed marks, e.g. at end of proof +\end_preamble +\use_default_options true +\maintain_unincluded_children no +\language english +\language_package default +\inputencoding utf8 +\fontencoding auto +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_roman_osf false +\font_sans_osf false +\font_typewriter_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures false +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command bibtex +\index_command default +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 0 +\use_minted 0 +\use_lineno 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tablestyle default +\tracking_changes false +\output_changes false +\change_bars false +\postpone_fragile_content false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\docbook_table_output 0 +\end_header + +\begin_body + +\begin_layout Title +Title +\end_layout + +\begin_layout Abstract +Abstract text. + +\begin_inset Flex Keywords +status open + +\begin_layout Plain Layout +First keyword +\begin_inset ERT +status collapsed + +\begin_layout Plain Layout + + +\backslash +and +\end_layout + +\end_inset + +Second keyword +\begin_inset ERT +status collapsed + +\begin_layout Plain Layout + + +\backslash +and +\end_layout + +\end_inset + +More +\end_layout + +\end_inset + + +\begin_inset Flex PACS +status open + +\begin_layout Plain Layout +PACS code1 +\begin_inset ERT +status collapsed + +\begin_layout Plain Layout + + +\backslash +and +\end_layout + +\end_inset + +PACS code2 +\begin_inset ERT +status collapsed + +\begin_layout Plain Layout + + +\backslash +and +\end_layout + +\end_inset + +more +\end_layout + +\end_inset + + +\begin_inset Flex Subclass +status open + +\begin_layout Plain Layout +MSC code1 +\begin_inset ERT +status collapsed + +\begin_layout Plain Layout + + +\backslash +and +\end_layout + +\end_inset + +MSC code2 +\begin_inset ERT +status collapsed + +\begin_layout Plain Layout + + +\backslash +and +\end_layout + +\end_inset + +more +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Your text comes here. + Separate text sections with +\end_layout + +\end_body +\end_document diff --git a/autotests/export/docbook/svglo.xml b/autotests/export/docbook/svglo.xml new file mode 100644 index 0000000..9615ca9 --- /dev/null +++ b/autotests/export/docbook/svglo.xml @@ -0,0 +1,19 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- This DocBook file was created by LyX 2.4.0dev + See http://www.lyx.org/ for more information --> +<article xml:lang="en_US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.2"> +<info> +<title>Title</title> +<subjectset role='pacs'><subject>PACS code1 <!-- \and --> +PACS code2 <!-- \and --> +more</subject></subjectset><subjectset role='mcs'><subject>MSC code1 <!-- \and --> +MSC code2 <!-- \and --> +more</subject></subjectset><keywordset><keyword>First keyword <!-- \and --> +Second keyword <!-- \and --> +More</keyword></keywordset><abstract> +<para>Abstract text. </para> +</abstract> + +</info> +<para>Your text comes here. Separate text sections with</para> +</article> \ No newline at end of file diff --git a/lib/layouts/svglobal3.layout b/lib/layouts/svglobal3.layout index c65c2a7..7a9083a 100644 --- a/lib/layouts/svglobal3.layout +++ b/lib/layouts/svglobal3.layout @@ -83,6 +83,11 @@ InsetLayout Flex:Subclass CopyStyle Flex:Keywords LatexName subclass LabelString "Mathematics Subject Classification" + DocBookTag subject + DocBookTagType paragraph + DocBookWrapperTag subjectset + DocBookWrapperAttr role='mcs' + DocBookInInfo always End InsetLayout Flex:CRSC diff --git a/src/OutputParams.h b/src/OutputParams.h index e64e623..ec94613 100644 --- a/src/OutputParams.h +++ b/src/OutputParams.h @@ -376,6 +376,9 @@ public: /// Is the current context a table? bool docbook_in_table = false; + /// Should the layouts that should/must go into <info> be generated? + bool docbook_generate_info = true; + /// Are we generating this material for inclusion in a TOC-like entity? bool for_toc = false; diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index c668f3b..aa6bd30 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -3365,7 +3365,7 @@ std::vector<docstring> Paragraph::simpleDocBookOnePar(Buffer const & buf, // If this is an InsetNewline, generate a new paragraph. Also reset the fonts, so that tags are closed in // this paragraph. - if (getInset(i) != nullptr && getInset(i)->lyxCode() == NEWLINE_CODE) { + if (getInset(i) && getInset(i)->lyxCode() == NEWLINE_CODE) { if (!ignore_fonts) xs->closeFontTags(); diff --git a/src/insets/InsetLayout.cpp b/src/insets/InsetLayout.cpp index c22aea9..dcb863c 100644 --- a/src/insets/InsetLayout.cpp +++ b/src/insets/InsetLayout.cpp @@ -91,9 +91,10 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass, IL_HTMLSTYLE, IL_HTMLPREAMBLE, IL_DOCBOOKTAG, - IL_DOCBOOKTAGTYPE, IL_DOCBOOKATTR, + IL_DOCBOOKTAGTYPE, IL_DOCBOOKSECTION, + IL_DOCBOOKININFO, IL_DOCBOOKWRAPPERTAG, IL_DOCBOOKWRAPPERTAGTYPE, IL_DOCBOOKWRAPPERATTR, @@ -142,6 +143,7 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass, { "decoration", IL_DECORATION }, { "display", IL_DISPLAY }, { "docbookattr", IL_DOCBOOKATTR }, + { "docbookininfo", IL_DOCBOOKININFO }, { "docbooksection", IL_DOCBOOKSECTION }, { "docbooktag", IL_DOCBOOKTAG }, { "docbooktagtype", IL_DOCBOOKTAGTYPE }, @@ -491,11 +493,14 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass, case IL_DOCBOOKTAG: lex >> docbooktag_; break; + case IL_DOCBOOKATTR: + lex >> docbookattr_; + break; case IL_DOCBOOKTAGTYPE: lex >> docbooktagtype_; break; - case IL_DOCBOOKATTR: - lex >> docbookattr_; + case IL_DOCBOOKININFO: + lex >> docbookininfo_; break; case IL_DOCBOOKSECTION: lex >> docbooksection_; @@ -638,6 +643,17 @@ docstring InsetLayout::htmlstyle() const return retval; } + +std::string const & InsetLayout::docbookininfo() const +{ + // Same as Layout::docbookininfo. + // Indeed, a trilean. Only titles should be "maybe": otherwise, metadata is "always", content is "never". + if (docbookininfo_.empty() || (docbookininfo_ != "never" && docbookininfo_ != "always" && docbookininfo_ != "maybe")) + docbookininfo_ = "never"; + return docbookininfo_; +} + + void InsetLayout::readArgument(Lexer & lex) { Layout::latexarg arg; diff --git a/src/insets/InsetLayout.h b/src/insets/InsetLayout.h index 8cc83dd..6f2f3dd 100644 --- a/src/insets/InsetLayout.h +++ b/src/insets/InsetLayout.h @@ -154,6 +154,8 @@ public: /// std::string docbookattr() const { return docbookattr_; } /// + std::string const & docbookininfo() const; + /// bool docbooksection() const { return docbooksection_; } /// std::string docbookwrappertag() const { return docbookwrappertag_; } @@ -295,6 +297,8 @@ private: /// std::string docbookattr_; /// + mutable std::string docbookininfo_; + /// bool docbooksection_ = false; /// std::string docbookwrappertag_; diff --git a/src/insets/InsetText.cpp b/src/insets/InsetText.cpp index 9e5f4ac..99ddfee 100644 --- a/src/insets/InsetText.cpp +++ b/src/insets/InsetText.cpp @@ -616,16 +616,28 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op } InsetLayout const & il = getLayout(); - if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") { - docstring attrs = docstring(); - if (!il.docbookattr().empty()) - attrs += from_ascii(il.docbookattr()); - if (il.docbooktag() == "link") - attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\""); - xs << xml::StartTag(il.docbooktag(), attrs); + + // Maybe this is an <info> paragraph that should not be generated at all (i.e. right now, its place is somewhere + // else, typically outside the current paragraph). + if (!rp.docbook_generate_info && il.docbookininfo() != "never") + return; + + // Start outputting this inset. + if (opts & WriteOuterTag) { + if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE") + xs << xml::StartTag(il.docbookwrappertag(), il.docbookwrapperattr()); + + if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") { + docstring attrs = docstring(); + if (!il.docbookattr().empty()) + attrs += from_ascii(il.docbookattr()); + if (il.docbooktag() == "link") + attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\""); + xs << xml::StartTag(il.docbooktag(), attrs); + } } - // No need for labels that are generated from counters. + // No need for labels that are generated from counters. They should be handled by the external DocBook processor. // With respect to XHTML, paragraphs are still allowed here. if (!allowMultiPar()) @@ -637,8 +649,13 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op docbookParagraphs(text_, buffer(), xs, runparams); xs.endDivision(); - if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") - xs << xml::EndTag(il.docbooktag()); + if (opts & WriteOuterTag) { + if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") + xs << xml::EndTag(il.docbooktag()); + + if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE") + xs << xml::EndTag(il.docbookwrappertag()); + } } diff --git a/src/output_docbook.cpp b/src/output_docbook.cpp index fad0658..76d1cdc 100644 --- a/src/output_docbook.cpp +++ b/src/output_docbook.cpp @@ -160,11 +160,10 @@ string fontToAttribute(xml::FontTypes type) { // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient // for the font. string role = fontToRole(type); - if (!role.empty()) { + if (!role.empty()) return "role='" + role + "'"; - } else { + else return ""; - } } @@ -412,10 +411,6 @@ void makeParagraph( OutputParams const & runparams, ParagraphList::const_iterator const & par) { - // If this kind of layout should be ignored, already leave. - if (par->layout().docbooktag() == "IGNORE") - return; - // Useful variables. auto const begin = text.paragraphs().begin(); auto const end = text.paragraphs().end(); @@ -511,7 +506,7 @@ void makeParagraph( // or we're not in the last paragraph, anyway. // (ii) We didn't open it and docbook_in_par is true, // but we are in the first par, and there is a next par. - bool const close_par = open_par && (!runparams.docbook_in_par); + bool const close_par = open_par && !runparams.docbook_in_par; // Determine if this paragraph has some real content. Things like new pages are not caught // by Paragraph::empty(), even though they do not generate anything useful in DocBook. @@ -542,10 +537,6 @@ void makeEnvironment(Text const &text, OutputParams const &runparams, ParagraphList::const_iterator const & par) { - // If this kind of layout should be ignored, already leave. - if (par->layout().docbooktag() == "IGNORE") - return; - // Useful variables. auto const end = text.paragraphs().end(); auto nextpar = par; @@ -648,13 +639,6 @@ ParagraphList::const_iterator makeListEnvironment(Text const &text, auto const end = text.paragraphs().end(); auto const envend = findEndOfEnvironment(par, end); - // If this kind of layout should be ignored, already leave. - if (begin->layout().docbooktag() == "IGNORE") { - auto nextpar = par; - ++nextpar; - return nextpar; - } - // Output the opening tag for this environment. Layout const & envstyle = par->layout(); openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype()); @@ -741,9 +725,6 @@ void makeCommand( OutputParams const & runparams, ParagraphList::const_iterator const & par) { - // If this kind of layout should be ignored, already leave. - if (par->layout().docbooktag() == "IGNORE") - return; // Useful variables. // Unlike XHTML, no need for labels, as they are handled by DocBook tags. @@ -909,30 +890,77 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, } // end anonymous namespace +std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par) +{ + // This function has a structure highly similar to makeAny and its friends. It's only made to be called on what + // should become the document's <abstract>. + std::set<const Inset *> values; + + // If this kind of layout should be ignored, already leave. + if (par->layout().docbooktag() == "IGNORE") + return values; + + // If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that + // DocBook doesn't want to be inside the abstract. + for (pos_type i = 0; i < par->size(); ++i) { + if (par->getInset(i) && par->getInset(i)->asInsetText()) { + InsetText const *inset = par->getInset(i)->asInsetText(); + + if (inset->getLayout().docbookininfo() != "never") { + values.insert(inset); + } else { + auto subpar = inset->paragraphs().begin(); + while (subpar != inset->paragraphs().end()) { + values.merge(gatherInfo(subpar)); + ++subpar; + } + } + } + } + + return values; +} + + ParagraphList::const_iterator makeAny(Text const &text, Buffer const &buf, XMLStream &xs, OutputParams const &runparams, ParagraphList::const_iterator par) { - switch (par->layout().latextype) { - case LATEX_COMMAND: - makeCommand(text, buf, xs, runparams, par); - break; - case LATEX_ENVIRONMENT: - makeEnvironment(text, buf, xs, runparams, par); - break; - case LATEX_LIST_ENVIRONMENT: - case LATEX_ITEM_ENVIRONMENT: - // Only case when makeAny() might consume more than one paragraph. - return makeListEnvironment(text, buf, xs, runparams, par); - case LATEX_PARAGRAPH: - makeParagraph(text, buf, xs, runparams, par); - break; - case LATEX_BIB_ENVIRONMENT: - makeBibliography(text, buf, xs, runparams, par); - break; + bool ignoreParagraph = false; + + // If this kind of layout should be ignored, already leave. + ignoreParagraph |= par->layout().docbooktag() == "IGNORE"; + + // For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the + // abstract itself. + bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract"; + ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info; + + // Switch on the type of paragraph to call the right handler. + if (!ignoreParagraph) { + switch (par->layout().latextype) { + case LATEX_COMMAND: + makeCommand(text, buf, xs, runparams, par); + break; + case LATEX_ENVIRONMENT: + makeEnvironment(text, buf, xs, runparams, par); + break; + case LATEX_LIST_ENVIRONMENT: + case LATEX_ITEM_ENVIRONMENT: + // Only case when makeAny() might consume more than one paragraph. + return makeListEnvironment(text, buf, xs, runparams, par); + case LATEX_PARAGRAPH: + makeParagraph(text, buf, xs, runparams, par); + break; + case LATEX_BIB_ENVIRONMENT: + makeBibliography(text, buf, xs, runparams, par); + break; + } } + + // For cases that are not lists, the next paragraph to handle is the next one. ++par; return par; } @@ -964,6 +992,9 @@ void outputDocBookInfo( // This check must be performed *before* a decision on whether or not to output <info> is made. bool hasAbstract = !info.abstract.empty(); docstring abstract; + set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract> + // paragraph. (This happens for quite a few layouts, unfortunately.) + if (hasAbstract) { // Generate the abstract XML into a string before further checks. // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might @@ -971,14 +1002,20 @@ void outputDocBookInfo( odocstringstream os2; XMLStream xs2(os2); - set<pit_type> doneParas; + auto rp = runparams; + rp.docbook_generate_info = false; + + set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists). for (auto const & p : info.abstract) { if (doneParas.find(p) == doneParas.end()) { auto oldPar = paragraphs.iterator_at(p); - auto newPar = makeAny(text, buf, xs2, runparams, oldPar); + auto newPar = makeAny(text, buf, xs2, rp, oldPar); + + infoInsets.merge(gatherInfo(oldPar)); // Insert the indices of all the paragraphs that were just generated (typically, one). // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.** + // Otherwise, makeAny and makeListEnvironment would have to be adapted too. pit_type id = p; while (oldPar != newPar) { doneParas.emplace(id); @@ -1009,13 +1046,11 @@ void outputDocBookInfo( xs << xml::CR(); } - // Output the elements that should go in <info>, before and after the abstract. + // Output the elements that should go in <info>. + // - First, the title. for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous // that mandating a wrapper like <info> would repel users. Thus, generate them first. makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit)); - for (auto pit : info.mustBeInInfo) - makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit)); - // If there is no title, generate one (required for the document to be valid). // This code is called for the main document, for table cells, etc., so be precise in this condition. if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) { @@ -1025,8 +1060,14 @@ void outputDocBookInfo( xs << xml::CR(); } - // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if - // it contains several paragraphs that are empty). + // - Then, other metadata. + for (auto pit : info.mustBeInInfo) + makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit)); + for (auto const * inset : infoInsets) + inset->docbook(xs, runparams); + + // - Finally, always output the abstract as the last item of the <info>, as it requires special treatment + // (especially if it contains several paragraphs that are empty). if (hasAbstract) { if (info.abstractLayout) { xs << XMLStream::ESCAPE_NONE << abstract; -- lyx-cvs mailing list lyx-cvs@lists.lyx.org http://lists.lyx.org/mailman/listinfo/lyx-cvs