sw/inc/EnhancedPDFExportHelper.hxx              |    6 
 sw/qa/extras/globalfilter/globalfilter.cxx      |  423 ++++++++++++++++++++++++
 sw/source/core/text/EnhancedPDFExportHelper.cxx |   38 +-
 sw/source/core/text/frmpaint.cxx                |   26 +
 sw/source/core/text/itrpaint.cxx                |   30 +
 sw/source/core/text/itrpaint.hxx                |    7 
 sw/source/core/text/porlay.cxx                  |   15 
 sw/source/core/text/porlay.hxx                  |    1 
 8 files changed, 531 insertions(+), 15 deletions(-)

New commits:
commit 9b38beadf9eaf027b201cdf0ecb2bce5611014dd
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Wed Mar 8 14:25:26 2023 +0100
Commit:     Michael Stahl <michael.st...@allotropia.de>
CommitDate: Fri Mar 10 10:59:26 2023 +0000

    sw: PDF/UA export: produce Lbl tagged element
    
    Commit bd66a0201fb6d1a127139287cc8b5bd27e3a92c3 did this for editengine
    text in shapes, but it turns out it's a little more complicated in sw.
    
    The SwTaggedPDFHelper nicely lived stack allocated, but list labels are
    SwLinePortions inside a paragraph, and they can't be painted separately,
    so if there's a list label the creation of LBody has to be delayed until
    that is processed.
    
    The SwNumberPortion can't even generate the Lbl itself, because there
    can be multiple portions, they are broken across lines and at script
    boundaries, and checking their follow flags is also tricky (see previous
    commit).
    
    Change-Id: I0dd383089a7ca0edddf6f805e79615c611a446f9
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/148556
    Tested-by: Michael Stahl <michael.st...@allotropia.de>
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git a/sw/inc/EnhancedPDFExportHelper.hxx 
b/sw/inc/EnhancedPDFExportHelper.hxx
index 2843e7a78a67..e928e417a0f0 100644
--- a/sw/inc/EnhancedPDFExportHelper.hxx
+++ b/sw/inc/EnhancedPDFExportHelper.hxx
@@ -110,8 +110,10 @@ struct Por_Info
 {
     const SwLinePortion& mrPor;
     const SwTextPainter& mrTextPainter;
-    Por_Info( const SwLinePortion& rPor, const SwTextPainter& rTextPainer )
-            : mrPor( rPor ), mrTextPainter( rTextPainer ) {};
+    bool const m_isNumberingLabel;
+
+    Por_Info(const SwLinePortion& rPor, const SwTextPainter& rTextPainer, bool 
const isNumberingLabel)
+        : mrPor(rPor), mrTextPainter(rTextPainer), 
m_isNumberingLabel(isNumberingLabel) {};
 };
 
 struct lt_TableColumn
diff --git a/sw/qa/extras/globalfilter/globalfilter.cxx 
b/sw/qa/extras/globalfilter/globalfilter.cxx
index 77a0b8aa2011..6351989aa541 100644
--- a/sw/qa/extras/globalfilter/globalfilter.cxx
+++ b/sw/qa/extras/globalfilter/globalfilter.cxx
@@ -12,6 +12,7 @@
 #include <com/sun/star/awt/XBitmap.hpp>
 #include <com/sun/star/graphic/XGraphic.hpp>
 #include <com/sun/star/graphic/GraphicType.hpp>
+#include <com/sun/star/text/ControlCharacter.hpp>
 #include <com/sun/star/text/XText.hpp>
 #include <com/sun/star/text/XDocumentIndex.hpp>
 #include <o3tl/safeint.hxx>
@@ -1230,6 +1231,428 @@ void Test::testBulletAsImage()
     }
 }
 
+CPPUNIT_TEST_FIXTURE(Test, testListLabelPDFExport)
+{
+    createSwDoc();
+
+    uno::Reference<text::XTextDocument> xDoc(mxComponent, 
uno::UNO_QUERY_THROW);
+    uno::Reference<text::XText> xText(xDoc->getText());
+    uno::Reference<lang::XMultiServiceFactory> xFactory(mxComponent, 
uno::UNO_QUERY_THROW);
+    uno::Reference<container::XIndexReplace> xNumRule(
+        xFactory->createInstance("com.sun.star.text.NumberingRules"),
+        uno::UNO_QUERY_THROW);
+    OUString listFormat;
+    for (sal_Int32 i = 0; i < xNumRule->getCount(); ++i)
+    {
+        uno::Sequence<beans::PropertyValue> format;
+        format.getArray();
+        xNumRule->getByIndex(i) >>= format;
+        {
+            auto it(::std::find_if(format.begin(), format.end(),
+                    [](auto const& r) { return r.Name == "NumberingType"; }));
+            // need something RTL
+            const_cast<uno::Any&>(it->Value) <<= 
style::NumberingType::CHARS_ARABIC;
+        }
+        {
+#if 0
+            // this doesn't work any more
+            auto it(::std::find_if(format.begin(), format.end(),
+                    [](auto const& r) { return r.Name == "ParentNumbering"; 
}));
+            const_cast<uno::Any&>(it->Value) <<= sal_Int16(i + 1);
+#endif
+            listFormat += "%" + OUString::number(i+1) + "%.";
+            auto it(::std::find_if(format.begin(), format.end(),
+                    [](auto const& r) { return r.Name == "ListFormat"; }));
+            const_cast<uno::Any&>(it->Value) <<= listFormat;
+        }
+        xNumRule->replaceByIndex(i, uno::Any(format));
+    }
+    uno::Reference<beans::XPropertySet>(getParagraph(1), 
uno::UNO_QUERY_THROW)->setPropertyValue("NumberingRules", uno::Any(xNumRule));
+    xText->insertControlCharacter(xText->getEnd(), 
text::ControlCharacter::PARAGRAPH_BREAK, false);
+    uno::Reference<beans::XPropertySet>(getParagraph(2), 
uno::UNO_QUERY_THROW)->setPropertyValue("NumberingLevel", 
uno::Any(sal_Int16(1)));
+    xText->insertControlCharacter(xText->getEnd(), 
text::ControlCharacter::PARAGRAPH_BREAK, false);
+    uno::Reference<beans::XPropertySet>(getParagraph(3), 
uno::UNO_QUERY_THROW)->setPropertyValue("NumberingLevel", 
uno::Any(sal_Int16(2)));
+
+    // check PDF export of the list items (label in particular)
+    utl::MediaDescriptor aMediaDescriptor;
+    aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+    // Enable PDF/UA
+    uno::Sequence<beans::PropertyValue> aFilterData(
+        comphelper::InitPropertySequence({ { "PDFUACompliance", uno::Any(true) 
} }));
+    aMediaDescriptor["FilterData"] <<= aFilterData;
+    css::uno::Reference<frame::XStorable> xStorable(mxComponent, 
css::uno::UNO_QUERY_THROW);
+    xStorable->storeToURL(maTempFile.GetURL(), 
aMediaDescriptor.getAsConstPropertyValueList());
+
+    vcl::filter::PDFDocument aDocument;
+    SvFileStream aStream(maTempFile.GetURL(), StreamMode::READ);
+    CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+    // The document has one page.
+    std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+
+    vcl::filter::PDFObjectElement* pContents = 
aPages[0]->LookupObject("Contents");
+    CPPUNIT_ASSERT(pContents);
+    vcl::filter::PDFStreamElement* pStream = pContents->GetStream();
+    CPPUNIT_ASSERT(pStream);
+    SvMemoryStream& rObjectStream = pStream->GetMemory();
+    // Uncompress it.
+    SvMemoryStream aUncompressed;
+    ZCodec aZCodec;
+    aZCodec.BeginCompression();
+    rObjectStream.Seek(0);
+    aZCodec.Decompress(rObjectStream, aUncompressed);
+    CPPUNIT_ASSERT(aZCodec.EndCompression());
+
+    auto pStart = static_cast<const char*>(aUncompressed.GetData());
+    const char* const pEnd = pStart + aUncompressed.GetSize();
+
+    enum
+    {
+        Default,
+        Lbl,
+        LblFoundText
+    } state
+        = Default;
+
+    auto nLine(0);
+    auto nLbl(0);
+    auto nLblTj(0);
+    auto nLblTJ(0);
+    std::vector<int> mcids;
+    while (true)
+    {
+        ++nLine;
+        auto const pLine = ::std::find(pStart, pEnd, '\n');
+        if (pLine == pEnd)
+        {
+            break;
+        }
+        std::string_view const line(pStart, pLine - pStart);
+        pStart = pLine + 1;
+        if (!line.empty() && line[0] != '%')
+        {
+            ::std::cerr << nLine << ": " << line << "\n";
+            if (o3tl::starts_with(line, "/Lbl<</MCID") && 
o3tl::ends_with(line, ">>BDC"))
+            {
+                CPPUNIT_ASSERT_EQUAL_MESSAGE("unexpected nesting", Default, 
state);
+                mcids.push_back(o3tl::toInt32(line.substr(12)));
+                state = Lbl;
+                ++nLbl;
+            }
+            else if (state == Lbl)
+            {
+                auto const endj(line.find(">Tj"));
+                if (endj != ::std::string_view::npos)
+                {
+                    auto const start(line.rfind("<", endj) + 1);
+                    // for these, expected length is 1 glyphs, each 2 digits
+                    // would be better to check the content but it depends on 
CMap
+                    CPPUNIT_ASSERT_EQUAL(static_cast<decltype(endj - start)>(1 
* 2), endj - start);
+                    state = LblFoundText;
+                    ++nLblTj;
+                }
+                else
+                {
+                    auto const endJ(line.find("]TJ"));
+                    if (endJ != ::std::string_view::npos)
+                    {
+                        auto const start(line.rfind("[", endJ) + 1);
+                        auto i(line.find("<", start));
+                        auto digits(0);
+                        while (i != ::std::string_view::npos && i < endJ)
+                        {
+                            auto const j(line.find(">", i));
+                            digits += j - (i+1);
+                            i = line.find("<", j);
+                        }
+                        // these have list-level numbers + one less ".", each 
2 digits
+                        
CPPUNIT_ASSERT_EQUAL(static_cast<decltype(digits)>((((nLbl/2 + 1) * 2) - 1) * 
2), digits);
+                        state = LblFoundText;
+                        ++nLblTJ;
+                    }
+                }
+            }
+            else if (state != Default && line == "EMC")
+            {
+                CPPUNIT_ASSERT_EQUAL_MESSAGE("missing text", LblFoundText, 
state);
+                state = Default;
+            }
+        }
+    }
+    CPPUNIT_ASSERT_EQUAL_MESSAGE("unclosed MCS", Default, state);
+    // ideally there should be 3 but apparently every text portion gets its own
+    // tag - this should not be a problem if these are grouped in the structure
+    // tree into 3 Lbl.
+    CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nLbl)>(6), nLbl);
+    // these are quite arbitrary?
+    CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nLbl)>(2), nLblTJ);
+    CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nLbl)>(4), nLblTj);
+
+    auto nL(0);
+    for (const auto& rDocElement : aDocument.GetElements())
+    {
+        auto pObject0 = 
dynamic_cast<vcl::filter::PDFObjectElement*>(rDocElement.get());
+        if (!pObject0)
+            continue;
+        auto pType0 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject0->Lookup("Type"));
+        if (!pType0 || pType0->GetValue() != "StructElem")
+        {
+            continue;
+        }
+        auto pS0 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject0->Lookup("S"));
+        if (!pS0 || pS0->GetValue() != "Document")
+        {
+            continue;
+        }
+        auto pKids0 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject0->Lookup("K"));
+        CPPUNIT_ASSERT(pKids0);
+
+        for (const auto& pKid0 : pKids0->GetElements())
+        {
+            auto pRefKid0 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKid0);
+            CPPUNIT_ASSERT(pRefKid0);
+            auto pObject1 = pRefKid0->LookupObject();
+            CPPUNIT_ASSERT(pObject1);
+            auto pType1 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1->Lookup("Type"));
+            CPPUNIT_ASSERT(pType1);
+
+            if (pType1 && pType1->GetValue() == "StructElem")
+            {
+                auto pS1 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1->Lookup("S"));
+                if (pS1 && pS1->GetValue() == "L")
+                {
+                    ++nL;
+                    auto pKids1 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject1->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids1);
+                    // this is purely structural so there should be 1 child
+                    CPPUNIT_ASSERT_EQUAL(size_t(1), 
pKids1->GetElements().size());
+
+                    auto pRefKid11 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids1->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid11);
+                    auto pObject11 = pRefKid11->LookupObject();
+                    CPPUNIT_ASSERT(pObject11);
+                    auto pType11 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject11->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType11);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType11->GetValue());
+                    auto pS11 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject11->Lookup("S"));
+                    CPPUNIT_ASSERT(pS11);
+                    CPPUNIT_ASSERT_EQUAL(OString("LI"), pS11->GetValue());
+                    // LI has 2 children: Lbl and LBody
+                    auto pKids11 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject11->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids11);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids11->GetElements().size());
+
+                    auto pRefKid111 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids11->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid111);
+                    auto pObject111 = pRefKid111->LookupObject();
+                    CPPUNIT_ASSERT(pObject111);
+                    auto pType111 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject111->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType111);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType111->GetValue());
+                    auto pS111 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject111->Lookup("S"));
+                    CPPUNIT_ASSERT(pS111);
+                    CPPUNIT_ASSERT_EQUAL(OString("Lbl"), pS111->GetValue());
+                    // Lbl has 2 children: the first 2 mcids (in order)
+                    auto pKids111 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject111->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids111);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids111->GetElements().size());
+
+                    auto pRefKid1111 = 
dynamic_cast<vcl::filter::PDFNumberElement*>(pKids111->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid1111);
+                    CPPUNIT_ASSERT_EQUAL(mcids[0], 
int(pRefKid1111->GetValue()));
+                    auto pRefKid1112 = 
dynamic_cast<vcl::filter::PDFNumberElement*>(pKids111->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid1112);
+                    CPPUNIT_ASSERT_EQUAL(mcids[1], 
int(pRefKid1112->GetValue()));
+
+                    auto pRefKid112 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids11->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid112);
+                    auto pObject112 = pRefKid112->LookupObject();
+                    CPPUNIT_ASSERT(pObject112);
+                    auto pType112 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType112);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType112->GetValue());
+                    auto pS112 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112->Lookup("S"));
+                    CPPUNIT_ASSERT(pS112);
+                    CPPUNIT_ASSERT_EQUAL(OString("LBody"), pS112->GetValue());
+                    // LBody has 2 children: paragraph and nested L (in order)
+                    auto pKids112 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject112->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids112);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids112->GetElements().size());
+
+                    auto pRefKid1121 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids112->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid1121);
+                    auto pObject1121 = pRefKid1121->LookupObject();
+                    CPPUNIT_ASSERT(pObject1121);
+                    auto pType1121 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1121->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType1121);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType1121->GetValue());
+                    auto pS1121 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1121->Lookup("S"));
+                    CPPUNIT_ASSERT(pS1121);
+                    CPPUNIT_ASSERT_EQUAL(OString("Standard"), 
pS1121->GetValue());
+
+                    auto pRefKid1122 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids112->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid1122);
+                    auto pObject1122 = pRefKid1122->LookupObject();
+                    CPPUNIT_ASSERT(pObject1122);
+                    auto pType1122 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType1122);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType1122->GetValue());
+                    auto pS1122 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122->Lookup("S"));
+                    CPPUNIT_ASSERT(pS1122);
+                    CPPUNIT_ASSERT_EQUAL(OString("L"), pS1122->GetValue());
+                    auto pKids1122 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject1122->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids1122);
+                    // this is purely structural so there should be 1 child
+                    CPPUNIT_ASSERT_EQUAL(size_t(1), 
pKids1122->GetElements().size());
+
+                    auto pRefKid11221 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids1122->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid11221);
+                    auto pObject11221 = pRefKid11221->LookupObject();
+                    CPPUNIT_ASSERT(pObject11221);
+                    auto pType11221 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject11221->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType11221);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType11221->GetValue());
+                    auto pS11221 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject11221->Lookup("S"));
+                    CPPUNIT_ASSERT(pS11221);
+                    CPPUNIT_ASSERT_EQUAL(OString("LI"), pS11221->GetValue());
+                    // LI has 2 children: Lbl and LBody
+                    auto pKids11221 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject11221->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids11221);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids11221->GetElements().size());
+
+                    auto pRefKid112211 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids11221->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid112211);
+                    auto pObject112211 = pRefKid112211->LookupObject();
+                    CPPUNIT_ASSERT(pObject112211);
+                    auto pType112211 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112211->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType112211);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType112211->GetValue());
+                    auto pS112211 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112211->Lookup("S"));
+                    CPPUNIT_ASSERT(pS112211);
+                    CPPUNIT_ASSERT_EQUAL(OString("Lbl"), pS112211->GetValue());
+                    // Lbl has 2 children: the first 2 mcids (in order)
+                    auto pKids112211 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject112211->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids112211);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids112211->GetElements().size());
+
+                    auto pRefKid1122111 = 
dynamic_cast<vcl::filter::PDFNumberElement*>(pKids112211->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid1122111);
+                    CPPUNIT_ASSERT_EQUAL(mcids[2], 
int(pRefKid1122111->GetValue()));
+                    auto pRefKid1122112 = 
dynamic_cast<vcl::filter::PDFNumberElement*>(pKids112211->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid1122112);
+                    CPPUNIT_ASSERT_EQUAL(mcids[3], 
int(pRefKid1122112->GetValue()));
+
+                    auto pRefKid112212 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids11221->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid112212);
+                    auto pObject112212 = pRefKid112212->LookupObject();
+                    CPPUNIT_ASSERT(pObject112212);
+                    auto pType112212 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112212->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType112212);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType112212->GetValue());
+                    auto pS112212 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112212->Lookup("S"));
+                    CPPUNIT_ASSERT(pS112212);
+                    CPPUNIT_ASSERT_EQUAL(OString("LBody"), 
pS112212->GetValue());
+                    // LBody has 2 children: paragraph and nested L (in order)
+                    auto pKids112212 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject112212->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids112212);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids112212->GetElements().size());
+
+                    auto pRefKid1122121 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids112212->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid1122121);
+                    auto pObject1122121 = pRefKid1122121->LookupObject();
+                    CPPUNIT_ASSERT(pObject1122121);
+                    auto pType1122121 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122121->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType1122121);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType1122121->GetValue());
+                    auto pS1122121 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122121->Lookup("S"));
+                    CPPUNIT_ASSERT(pS1122121);
+                    CPPUNIT_ASSERT_EQUAL(OString("Standard"), 
pS1122121->GetValue());
+
+                    auto pRefKid1122122 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids112212->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid1122122);
+                    auto pObject1122122 = pRefKid1122122->LookupObject();
+                    CPPUNIT_ASSERT(pObject1122122);
+                    auto pType1122122 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122122->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType1122122);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType1122122->GetValue());
+                    auto pS1122122 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122122->Lookup("S"));
+                    CPPUNIT_ASSERT(pS1122122);
+                    CPPUNIT_ASSERT_EQUAL(OString("L"), pS1122122->GetValue());
+                    auto pKids1122122 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject1122122->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids1122122);
+                    // this is purely structural so there should be 1 child
+                    CPPUNIT_ASSERT_EQUAL(size_t(1), 
pKids1122122->GetElements().size());
+
+                    auto pRefKid11221221 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids1122122->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid11221221);
+                    auto pObject11221221 = pRefKid11221221->LookupObject();
+                    CPPUNIT_ASSERT(pObject11221221);
+                    auto pType11221221 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject11221221->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType11221221);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType11221221->GetValue());
+                    auto pS11221221 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject11221221->Lookup("S"));
+                    CPPUNIT_ASSERT(pS11221221);
+                    CPPUNIT_ASSERT_EQUAL(OString("LI"), 
pS11221221->GetValue());
+                    // LI has 2 children: Lbl and LBody
+                    auto pKids11221221 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject11221221->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids11221221);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids11221221->GetElements().size());
+
+                    auto pRefKid112212211 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids11221221->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid112212211);
+                    auto pObject112212211 = pRefKid112212211->LookupObject();
+                    CPPUNIT_ASSERT(pObject112212211);
+                    auto pType112212211 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112212211->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType112212211);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType112212211->GetValue());
+                    auto pS112212211 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112212211->Lookup("S"));
+                    CPPUNIT_ASSERT(pS112212211);
+                    CPPUNIT_ASSERT_EQUAL(OString("Lbl"), 
pS112212211->GetValue());
+                    // Lbl has 2 children: the first 2 mcids (in order)
+                    auto pKids112212211 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject112212211->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids112212211);
+                    CPPUNIT_ASSERT_EQUAL(size_t(2), 
pKids112212211->GetElements().size());
+
+                    auto pRefKid1122122111 = 
dynamic_cast<vcl::filter::PDFNumberElement*>(pKids112212211->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid1122122111);
+                    CPPUNIT_ASSERT_EQUAL(mcids[4], 
int(pRefKid1122122111->GetValue()));
+                    auto pRefKid1122122112 = 
dynamic_cast<vcl::filter::PDFNumberElement*>(pKids112212211->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid1122122112);
+                    CPPUNIT_ASSERT_EQUAL(mcids[5], 
int(pRefKid1122122112->GetValue()));
+
+                    auto pRefKid112212212 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids11221221->GetElements()[1]);
+                    CPPUNIT_ASSERT(pRefKid112212212);
+                    auto pObject112212212 = pRefKid112212212->LookupObject();
+                    CPPUNIT_ASSERT(pObject112212212);
+                    auto pType112212212 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112212212->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType112212212);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType112212212->GetValue());
+                    auto pS112212212 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject112212212->Lookup("S"));
+                    CPPUNIT_ASSERT(pS112212212);
+                    CPPUNIT_ASSERT_EQUAL(OString("LBody"), 
pS112212212->GetValue());
+                    // inner LBody has 1 children: paragraph
+                    auto pKids112212212 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject112212212->Lookup("K"));
+                    CPPUNIT_ASSERT(pKids112212212);
+                    CPPUNIT_ASSERT_EQUAL(size_t(1), 
pKids112212212->GetElements().size());
+
+                    auto pRefKid1122122121 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKids112212212->GetElements()[0]);
+                    CPPUNIT_ASSERT(pRefKid1122122121);
+                    auto pObject1122122121 = pRefKid1122122121->LookupObject();
+                    CPPUNIT_ASSERT(pObject1122122121);
+                    auto pType1122122121 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122122121->Lookup("Type"));
+                    CPPUNIT_ASSERT(pType1122122121);
+                    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType1122122121->GetValue());
+                    auto pS1122122121 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject1122122121->Lookup("S"));
+                    CPPUNIT_ASSERT(pS1122122121);
+                    CPPUNIT_ASSERT_EQUAL(OString("Standard"), 
pS1122122121->GetValue());
+                }
+            }
+        }
+    }
+    CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nL)>(1), nL);
+}
+
 CPPUNIT_TEST_FIXTURE(Test, testTdf143311)
 {
     createSwDoc("tdf143311-1.docx");
diff --git a/sw/source/core/text/EnhancedPDFExportHelper.cxx 
b/sw/source/core/text/EnhancedPDFExportHelper.cxx
index d6fada0c90fb..c1a6995bd2aa 100644
--- a/sw/source/core/text/EnhancedPDFExportHelper.cxx
+++ b/sw/source/core/text/EnhancedPDFExportHelper.cxx
@@ -154,6 +154,7 @@ constexpr OUStringLiteral aH9String = u"H9";
 constexpr OUStringLiteral aH10String = u"H10";
 constexpr OUStringLiteral aListString = u"L";
 constexpr OUStringLiteral aListItemString = u"LI";
+constexpr OUStringLiteral aListLabelString = u"Lbl";
 constexpr OUStringLiteral aListBodyString = u"LBody";
 constexpr OUStringLiteral aBlockQuoteString = u"BlockQuote";
 constexpr OUStringLiteral aCaptionString = u"Caption";
@@ -476,6 +477,15 @@ void SwTaggedPDFHelper::BeginTag( 
vcl::PDFWriter::StructElement eType, const OUS
     {
         const SwFrame& rFrame = mpFrameInfo->mrFrame;
 
+        if (vcl::PDFWriter::LIBody == eType && rFrame.IsTextFrame())
+        {
+            SwTextFrame const& rTextFrame(static_cast<const 
SwTextFrame&>(rFrame));
+            SwTextNode const*const 
pTextNd(rTextFrame.GetTextNodeForParaProps());
+            SwNodeNum const*const 
pNodeNum(pTextNd->GetNum(rTextFrame.getRootFrame()));
+            NumListBodyIdMap& rNumListBodyIdMap = 
SwEnhancedPDFExportHelper::GetNumListBodyIdMap();
+            rNumListBodyIdMap[ pNodeNum ] = nId;
+        }
+
         if ( ( rFrame.IsPageFrame() && !static_cast<const 
SwPageFrame&>(rFrame).GetPrev() ) ||
              ( rFrame.IsFlowFrame() && 
!SwFlowFrame::CastFlowFrame(&rFrame)->IsFollow() && 
SwFlowFrame::CastFlowFrame(&rFrame)->HasFollow() ) ||
              ( rFrame.IsTextFrame() && rFrame.GetDrawObjs() ) ||
@@ -1026,7 +1036,12 @@ void 
SwTaggedPDFHelper::BeginNumberedListStructureElements()
     if ( bNewItemTag )
     {
         BeginTag( vcl::PDFWriter::ListItem, aListItemString );
-        BeginTag( vcl::PDFWriter::LIBody, aListBodyString );
+        assert(rTextFrame.GetPara());
+        // check whether to open LIBody now or delay until after Lbl
+        if (!rTextFrame.GetPara()->HasNumberingPortion())
+        {
+            BeginTag(vcl::PDFWriter::LIBody, aListBodyString);
+        }
     }
 }
 
@@ -1125,8 +1140,15 @@ void SwTaggedPDFHelper::BeginBlockStructureElements()
 
         case SwFrameType::Txt :
             {
-                const SwTextNode* pTextNd =
-                    static_cast<const 
SwTextFrame*>(pFrame)->GetTextNodeForParaProps();
+                SwTextFrame const& rTextFrame(*static_cast<const 
SwTextFrame*>(pFrame));
+                // lazy open LIBody after Lbl
+                if (rTextFrame.GetPara()->HasNumberingPortion())
+                {
+                    assert(!rTextFrame.IsFollow());
+                    BeginTag(vcl::PDFWriter::LIBody, aListBodyString);
+                }
+
+                const SwTextNode *const 
pTextNd(rTextFrame.GetTextNodeForParaProps());
 
                 const SwFormat* pTextFormat = pTextNd->GetFormatColl();
                 const SwFormat* pParentTextFormat = pTextFormat ? 
pTextFormat->DerivedFrom() : nullptr;
@@ -1519,6 +1541,16 @@ void SwTaggedPDFHelper::BeginInlineStructureElements()
             }
             break;
 
+        case PortionType::Number:
+        case PortionType::Bullet:
+        case PortionType::GrfNum:
+            if (mpPorInfo->m_isNumberingLabel)
+            {   // only works for multiple lines via wrapper from PaintSwFrame
+                nPDFType = vcl::PDFWriter::LILabel;
+                aPDFType = aListLabelString;
+            }
+            break;
+
         case PortionType::Tab :
         case PortionType::TabRight :
         case PortionType::TabCenter :
diff --git a/sw/source/core/text/frmpaint.cxx b/sw/source/core/text/frmpaint.cxx
index 2af086d9fd69..0e4a7f677917 100644
--- a/sw/source/core/text/frmpaint.cxx
+++ b/sw/source/core/text/frmpaint.cxx
@@ -647,12 +647,6 @@ void SwTextFrame::PaintSwFrame(vcl::RenderContext& 
rRenderContext, SwRect const&
     // #i16816# tagged pdf support
     SwViewShell *pSh = getRootFrame()->GetCurrShell();
 
-    Num_Info aNumInfo( *this );
-    SwTaggedPDFHelper aTaggedPDFHelperNumbering( &aNumInfo, nullptr, nullptr, 
rRenderContext );
-
-    Frame_Info aFrameInfo( *this );
-    SwTaggedPDFHelper aTaggedPDFHelperParagraph( nullptr, &aFrameInfo, 
nullptr, rRenderContext );
-
     if( IsEmpty() && PaintEmpty( rRect, true ) )
         return;
 
@@ -679,6 +673,22 @@ void SwTextFrame::PaintSwFrame(vcl::RenderContext& 
rRenderContext, SwRect const&
         }
     }
 
+    Num_Info aNumInfo( *this );
+    SwTaggedPDFHelper aTaggedPDFHelperNumbering( &aNumInfo, nullptr, nullptr, 
rRenderContext );
+
+    // Lbl unfortunately must be able to contain multiple numbering portions
+    // that may be on multiple lines of text (but apparently always in the
+    // master frame), so it gets complicated.
+    ::std::optional<SwTaggedPDFHelper> oTaggedLabel;
+    // Paragraph tag - if there is a list label, opening should be delayed.
+    ::std::optional<SwTaggedPDFHelper> oTaggedParagraph;
+
+    if (IsFollow() || !GetPara()->HasNumberingPortion())
+    {   // no Lbl needed => open paragraph tag now
+        Frame_Info aFrameInfo(*this);
+        oTaggedParagraph.emplace(nullptr, &aFrameInfo, nullptr, 
rRenderContext);
+    }
+
     // We don't want to be interrupted while painting.
     // Do that after thr Format()!
     TextFrameLockGuard aLock(const_cast<SwTextFrame*>(this));
@@ -763,7 +773,7 @@ void SwTextFrame::PaintSwFrame(vcl::RenderContext& 
rRenderContext, SwRect const&
         {
             do
             {
-                aLine.DrawTextLine( rRect, aClip, IsUndersized() );
+                aLine.DrawTextLine(rRect, aClip, IsUndersized(), oTaggedLabel, 
oTaggedParagraph);
 
             } while( aLine.Next() && aLine.Y() <= nBottom );
         }
@@ -780,6 +790,8 @@ void SwTextFrame::PaintSwFrame(vcl::RenderContext& 
rRenderContext, SwRect const&
 
     OSL_ENSURE( ! IsSwapped(), "A frame is swapped after Paint" );
 
+    assert(!oTaggedLabel); // must have been closed if opened
+    assert(oTaggedParagraph || rRect.GetIntersection(getFrameArea()) != 
getFrameArea()); // must have been created during complete paint (PDF export is 
always complete paint)
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/core/text/itrpaint.cxx b/sw/source/core/text/itrpaint.cxx
index a66d358645f0..474cd4811317 100644
--- a/sw/source/core/text/itrpaint.cxx
+++ b/sw/source/core/text/itrpaint.cxx
@@ -32,6 +32,7 @@
 #include <txtfrm.hxx>
 #include <swfont.hxx>
 #include "txtpaint.hxx"
+#include "porfld.hxx"
 #include "portab.hxx"
 #include <txatbase.hxx>
 #include <charfmt.hxx>
@@ -118,7 +119,9 @@ SwLinePortion *SwTextPainter::CalcPaintOfst( const SwRect 
&rPaint )
 //    (objectively slow, subjectively fast)
 // Since the user usually judges subjectively the second method is set as 
default.
 void SwTextPainter::DrawTextLine( const SwRect &rPaint, SwSaveClip &rClip,
-                                 const bool bUnderSized )
+    const bool bUnderSized,
+    ::std::optional<SwTaggedPDFHelper> & roTaggedLabel,
+    ::std::optional<SwTaggedPDFHelper> & roTaggedParagraph)
 {
 #if OSL_DEBUG_LEVEL > 1
 //    sal_uInt16 nFntHeight = GetInfo().GetFont()->GetHeight( 
GetInfo().GetVsh(), GetInfo().GetOut() );
@@ -386,9 +389,19 @@ void SwTextPainter::DrawTextLine( const SwRect &rPaint, 
SwSaveClip &rClip,
             GetInfo().SetUnderFnt( nullptr );
         }
 
+        // multiple numbering portions are possible :(
+        if (pPor->InNumberGrp() && !pPor->IsFootnoteNumPortion()
+            && !static_cast<SwNumberPortion const*>(pPor)->IsFollow())
+        {
+            assert(!roTaggedLabel);
+            assert(!m_pFrame->IsFollow());
+            Por_Info aPorInfo(*pPor, *this, true); // open Lbl
+            roTaggedLabel.emplace(nullptr, nullptr, &aPorInfo, *pOut);
+        }
+
         {
             // #i16816# tagged pdf support
-            Por_Info aPorInfo( *pPor, *this );
+            Por_Info aPorInfo(*pPor, *this, false);
             SwTaggedPDFHelper aTaggedPDFHelper( nullptr, nullptr, &aPorInfo, 
*pOut );
 
             if( pPor->IsMultiPortion() )
@@ -397,6 +410,19 @@ void SwTextPainter::DrawTextLine( const SwRect &rPaint, 
SwSaveClip &rClip,
                 pPor->Paint( GetInfo() );
         }
 
+        // lazy open LIBody and paragraph tag after num portions have been 
painted to Lbl
+        if (pPor->InNumberGrp() && !pPor->IsFootnoteNumPortion()
+            // note: numbering portion may be split if it has multiple scripts
+            && !static_cast<SwNumberPortion const*>(pPor)->HasFollow()) // so 
wait for the last one
+        {
+            assert(roTaggedLabel);
+            roTaggedLabel.reset(); // close Lbl
+            assert(!roTaggedParagraph);
+            assert(!m_pFrame->IsFollow());
+            Frame_Info aFrameInfo(*m_pFrame); // open LIBody
+            roTaggedParagraph.emplace(nullptr, &aFrameInfo, nullptr, *pOut);
+        }
+
         // reset underline font
         if ( pOldUnderLineFnt )
             GetInfo().SetUnderFnt( pOldUnderLineFnt );
diff --git a/sw/source/core/text/itrpaint.hxx b/sw/source/core/text/itrpaint.hxx
index c016cff4e735..a941e2125c8b 100644
--- a/sw/source/core/text/itrpaint.hxx
+++ b/sw/source/core/text/itrpaint.hxx
@@ -21,8 +21,11 @@
 
 #include "itrtxt.hxx"
 
+#include <optional>
+
 class SwSaveClip;          // SwTextPainter
 class SwMultiPortion;
+class SwTaggedPDFHelper;
 
 class SwTextPainter : public SwTextCursor
 {
@@ -46,7 +49,9 @@ public:
         CtorInitTextPainter( pTextFrame, pTextPaintInf );
     }
     void DrawTextLine( const SwRect &rPaint, SwSaveClip &rClip,
-                       const bool bUnderSz );
+        const bool bUnderSz,
+        ::std::optional<SwTaggedPDFHelper> & roTaggedLabel,
+        ::std::optional<SwTaggedPDFHelper> & roTaggedParagraph);
     void PaintDropPortion();
     // if PaintMultiPortion is called recursively, we have to pass the
     // surrounding SwBidiPortion
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 3b00fac71031..7b66d17a2c9a 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -2683,6 +2683,21 @@ TextFrameIndex SwParaPortion::GetParLen() const
     return nLen;
 }
 
+bool SwParaPortion::HasNumberingPortion() const
+{
+    SwLinePortion const* pPortion(nullptr);
+    // the first line may contain only fly portion...
+    for (SwLineLayout const* pLine = this; pLine && !pPortion; pLine = 
pLine->GetNext())
+    {
+        pPortion = pLine->GetFirstPortion();
+        while (pPortion && (pPortion->InGlueGrp() || pPortion->IsFlyPortion()))
+        {   // skip margins and fly spacers - numbering should be first then
+            pPortion = pPortion->GetNextPortion();
+        }
+    }
+    return pPortion && pPortion->InNumberGrp() && 
!pPortion->IsFootnoteNumPortion();
+}
+
 const SwDropPortion *SwParaPortion::FindDropPortion() const
 {
     const SwLineLayout *pLay = this;
diff --git a/sw/source/core/text/porlay.hxx b/sw/source/core/text/porlay.hxx
index 3960671fcca6..9a2da25517bd 100644
--- a/sw/source/core/text/porlay.hxx
+++ b/sw/source/core/text/porlay.hxx
@@ -319,6 +319,7 @@ public:
     bool IsFootnoteNum() const { return m_bFootnoteNum; }
     void SetMargin( const bool bNew = true ) { m_bMargin = bNew; }
     bool IsMargin() const { return m_bMargin; }
+    bool HasNumberingPortion() const;
 
     // Set nErgo in the QuoVadisPortion
     void SetErgoSumNum( const OUString &rErgo );

Reply via email to