commit f219fe1cd7aaa68ab35ec8b3e7fef25926b77cdd Author: Thibaut Cuvelier <tcuvel...@lyx.org> Date: Sun Aug 30 00:35:47 2020 +0200
DocBook: streamline code to handle abstracts. --- autotests/export/docbook/basic_book.xml | 23 ++-- src/output_docbook.cpp | 191 +++++++++++++------------------ 2 files changed, 88 insertions(+), 126 deletions(-) diff --git a/autotests/export/docbook/basic_book.xml b/autotests/export/docbook/basic_book.xml index c94d2fa..18496b9 100644 --- a/autotests/export/docbook/basic_book.xml +++ b/autotests/export/docbook/basic_book.xml @@ -1,12 +1,14 @@ <?xml version="1.0" encoding="UTF-8"?> <!-- This DocBook file was created by LyX 2.4.0dev See http://www.lyx.org/ for more information --> -<book xml:lang="en_US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.1"> - -<info><title>Book title</title> -<author><personname>Book author</personname></author> - -<abstract><para>Book abstract in info. </para> +<book xml:lang="en_US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.2"> +<info> +<title>Book title</title> +<author> +<personname>Book author</personname> +</author> +<abstract> +<para>Book abstract in info. </para> </abstract> </info> <chapter> @@ -15,9 +17,7 @@ </chapter> <part> <title>First part</title> - -<partintro><para>Part intro. </para> -</partintro> +<para>Part intro. </para> <chapter> <title>First chapter of first part</title> <para>Paragraph. </para> @@ -29,9 +29,7 @@ </part> <part> <title>Second part</title> - -<partintro><para>Part 2 intro. </para> -</partintro> +<para>Part 2 intro. </para> <chapter> <title>First chapter of second part</title> <para>Paragraph. </para> @@ -41,5 +39,4 @@ <para>Paragraph. </para> </chapter> </part> - </book> \ No newline at end of file diff --git a/src/output_docbook.cpp b/src/output_docbook.cpp index e609f39..38bcf05 100644 --- a/src/output_docbook.cpp +++ b/src/output_docbook.cpp @@ -773,21 +773,28 @@ ParagraphList::const_iterator makeAny(Text const &text, } +bool isLayoutSectioning(Layout const & lay) +{ + return lay.category() == from_utf8("Sectioning"); +} + + using DocBookDocumentSectioning = tuple<bool, pit_type>; struct DocBookInfoTag { const set<pit_type> shouldBeInInfo; - const set<pit_type> mustBeInInfo; + const set<pit_type> mustBeInInfo; // With the notable exception of the abstract! const set<pit_type> abstract; + const bool abstractLayout; pit_type bpit; pit_type epit; DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo, - const set<pit_type> & abstract, pit_type bpit, pit_type epit) : + const set<pit_type> & abstract, bool abstractLayout, pit_type bpit, pit_type epit) : shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract), - bpit(bpit), epit(epit) {} + abstractLayout(abstractLayout), bpit(bpit), epit(epit) {} }; @@ -796,7 +803,7 @@ DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, while (bpit < epit) { Layout const &style = paragraphs[bpit].layout(); - documentHasSections |= style.category() == from_utf8("Sectioning"); + documentHasSections |= isLayoutSectioning(style); if (documentHasSections) break; @@ -820,10 +827,14 @@ bool hasOnlyNotes(Paragraph const & par) } -DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) { +DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, + pit_type bpit, pit_type const epit, + // Typically, bpit is the beginning of the document and epit the end *or* the first section. + bool documentHasSections) { set<pit_type> shouldBeInInfo; set<pit_type> mustBeInInfo; - set<pit_type> abstract; + set<pit_type> abstractWithLayout; + set<pit_type> abstractNoLayout; // Find the first non empty paragraph by mutating bpit. while (bpit < epit) { @@ -834,78 +845,48 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type b break; } - // Find the last info-like paragraph. - pit_type cpit = bpit; + // Traverse everything that might belong to <info>. bool hasAbstractLayout = false; - while (cpit < epit) { - // Skip paragraphs only containing one note. + pit_type cpit = bpit; + for (; cpit < epit; ++cpit) { + // Skip paragraphs that don't generate anything in DocBook. Paragraph const & par = paragraphs[cpit]; - if (hasOnlyNotes(par)) { - cpit += 1; + if (par.empty() || par.emptyTag() || hasOnlyNotes(par)) continue; + + // There should never be any section here. (Just a sanity check: if this fails, this function could end up + // processing the whole document.) + if (isLayoutSectioning(par.layout())) { + LYXERR0("Assertion failed: section found in potential <info> paragraphs."); + break; } - if (par.layout().docbookabstract()) + // If this is marked as an abstract by the layout, put it in the right set. + if (par.layout().docbookabstract()) { hasAbstractLayout = true; + abstractWithLayout.emplace(cpit); + continue; + } - // Based on layout information, store this paragraph in one set: should be in <info>, must be. + // Based on layout information, store this paragraph in one set: should be in <info>, must be, + // or abstract (). Layout const &style = par.layout(); - if (style.docbookininfo() == "always") { + if (style.docbookininfo() == "always") mustBeInInfo.emplace(cpit); - } else if (style.docbookininfo() == "maybe") { + else if (style.docbookininfo() == "maybe") shouldBeInInfo.emplace(cpit); - } else { - // Hypothesis: the <info> parts should be grouped together near the beginning bpit. - // There may be notes in between, but nothing else. + else if (!hasAbstractLayout) + abstractNoLayout.emplace(cpit); + else // This should definitely not be in <info>. break; - } - cpit += 1; } - // Now, cpit points to the last paragraph that has things that could go in <info>. + // Now, cpit points to the first paragraph that no more has things that could go in <info>. // bpit is the beginning of the <info> part. - // Go once again through the list of paragraphs to find the abstract. If there is an abstract - // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text. - if (hasAbstractLayout) { - pit_type pit = bpit; - while (pit < cpit) { // Don't overshoot the <info> part. - if (paragraphs[pit].layout().docbookabstract()) - abstract.emplace(pit); - pit++; - } - } else { - pit_type lastAbstract = epit + 1; // A nonsensical value. - docstring lastAbstractLayout; - - pit_type pit = bpit; - while (pit < cpit) { // Don't overshoot the <info> part. - const Paragraph & par = paragraphs.at(pit); - if (!par.insetList().empty()) { - for (const auto &i : par.insetList()) { - if (i.inset->getText(0) != nullptr) { - if (lastAbstract == epit + 1) { - // First paragraph that matches the heuristic definition of abstract. - lastAbstract = pit; - lastAbstractLayout = par.layout().name(); - } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) { - // This is either too far from the last abstract paragraph or doesn't - // have the right layout name, BUT there has already been an abstract - // in this document: done with detecting the abstract. - goto done; // Easier to get out of two nested loops. - } - - abstract.emplace(pit); - break; - } - } - } - pit++; - } - } - - done: - return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit); + return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, + hasAbstractLayout ? abstractWithLayout : abstractNoLayout, + hasAbstractLayout, bpit, cpit); } } // end anonymous namespace @@ -940,17 +921,9 @@ void outputDocBookInfo( if (hasAbstract) { // Generate the abstract XML into a string before further checks. odocstringstream os2; - { - XMLStream xs2(os2); - auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end()); - auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end()); - // info.abstract is inclusive, epit is exclusive, hence +1 for looping. - - while (bpit < epit) { - makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit)); - bpit += 1; - } - } + XMLStream xs2(os2); + for (auto const & p : info.abstract) + makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(p)); // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this, // even though they must be properly output if there is some abstract. @@ -974,27 +947,33 @@ void outputDocBookInfo( } // Output the elements that should go in <info>, before and after the abstract. - for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous + for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous // that mandating a wrapper like <info> would repel users. Thus, generate them first. makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit)); - } - for (auto pit : info.mustBeInInfo) { + for (auto pit : info.mustBeInInfo) if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after. makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit)); - } // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if // it contains several paragraphs that are empty). if (hasAbstract) { -// string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag(); -// if (tag == "NONE") -// tag = "abstract"; -// -// xs << xml::StartTag(tag); -// xs << xml::CR(); - xs << XMLStream::ESCAPE_NONE << abstract; -// xs << xml::EndTag(tag); -// xs << xml::CR(); + if (info.abstractLayout) { + xs << XMLStream::ESCAPE_NONE << abstract; + xs << xml::CR(); + } else { + string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag(); + if (tag == "NONE") + tag = "abstract"; + + if (!xs.isLastTagCR()) + xs << xml::CR(); + + xs << xml::StartTag(tag); + xs << xml::CR(); + xs << XMLStream::ESCAPE_NONE << abstract; + xs << xml::EndTag(tag); + xs << xml::CR(); + } } // End the <info> tag if it was started. @@ -1006,23 +985,6 @@ void outputDocBookInfo( } -void docbookFirstParagraphs( - Text const &text, - Buffer const &buf, - XMLStream &xs, - OutputParams const &runparams, - pit_type epit) -{ - // Handle the beginning of the document, supposing it has sections. - // Major role: output the first <info> tag. - - ParagraphList const ¶graphs = text.paragraphs(); - pit_type bpit = runparams.par_begin; - DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit); - outputDocBookInfo(text, buf, xs, runparams, paragraphs, info); -} - - void docbookSimpleAllParagraphs( Text const & text, Buffer const & buf, @@ -1036,7 +998,7 @@ void docbookSimpleAllParagraphs( ParagraphList const ¶graphs = text.paragraphs(); pit_type bpit = runparams.par_begin; pit_type const epit = runparams.par_end; - DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit); + DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit, false); outputDocBookInfo(text, buf, xs, runparams, paragraphs, info); // Then, the content. It starts where the <info> ends. @@ -1071,20 +1033,24 @@ void docbookParagraphs(Text const &text, std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth // of the section and the tag that was used to open it. - // Detect whether the document contains sections. If there are no sections, there can be no automatically - // discovered abstract. + // Detect whether the document contains sections. If there are no sections, treatment is largely simplified. + // In particular, there can't be an abstract, unless it is manually marked. bool documentHasSections; pit_type eppit; tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit); - if (documentHasSections) { - docbookFirstParagraphs(text, buf, xs, runparams, eppit); - bpit = eppit; - } else { + // Deal with "simple" documents, i.e. those without sections. + if (!documentHasSections){ docbookSimpleAllParagraphs(text, buf, xs, runparams); return; } + // Output the first <info> tag (or just the title). + DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, eppit, true); + outputDocBookInfo(text, buf, xs, runparams, paragraphs, info); + bpit = eppit; + + // Then, iterate through the paragraphs of this document. bool currentlyInAppendix = false; auto par = text.paragraphs().iterator_at(bpit); @@ -1102,8 +1068,7 @@ void docbookParagraphs(Text const &text, Layout const &style = par->layout(); // Think about adding <section> and/or </section>s. - const bool isLayoutSectioning = style.category() == from_utf8("Sectioning"); - if (isLayoutSectioning) { + if (isLayoutSectioning(style)) { int level = style.toclevel; // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2> -- lyx-cvs mailing list lyx-cvs@lists.lyx.org http://lists.lyx.org/mailman/listinfo/lyx-cvs