vcl/qa/cppunit/pdfexport/data/tdf164106.fodt | 133 +++++++++++++++++++++++++++ vcl/qa/cppunit/pdfexport/pdfexport2.cxx | 49 +++++++++ vcl/source/gdi/CommonSalLayout.cxx | 84 ++++++++++++++++- 3 files changed, 262 insertions(+), 4 deletions(-)
New commits: commit e9f16c0bf79f464bf58b52025d8ab57f4b8dc30a Author: Jonathan Clark <[email protected]> AuthorDate: Thu Dec 5 20:49:03 2024 -0700 Commit: Adolfo Jayme Barrientos <[email protected]> CommitDate: Sat Dec 7 20:38:10 2024 +0100 tdf#164106 Fix reordered glyph positioning with split grapheme clusters Due to formatting, grapheme clusters can possibly be split across multiple layouts. Layouts containing split grapheme clusters are created by laying out the complete string, and extracting only the necessary glyphs based on source codepoint index. This approach is good enough for most diacritic cases, but it cannot handle certain substitution cases where glyphs with advances would be interleaved with other layouts. Sub-layouts must be contiguous. This change introduces code to disable grapheme cluster splitting in these cases that cannot be handled correctly. Change-Id: I122abbf9c3f8a5efa4c72ad47991d0ad9ff8a8c0 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177927 Tested-by: Jenkins Reviewed-by: Jonathan Clark <[email protected]> (cherry picked from commit 9b2eaa37ba08b4d2c2f1c54a5fbbdaae19fbeedd) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177972 Reviewed-by: Adolfo Jayme Barrientos <[email protected]> diff --git a/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt b/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt new file mode 100644 index 000000000000..6d3866b43af0 --- /dev/null +++ b/vcl/qa/cppunit/pdfexport/data/tdf164106.fodt @@ -0,0 +1,133 @@ +<?xml version='1.0' encoding='UTF-8'?> +<office:document xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:c alcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns: meta:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" office:version="1.4" office:mimetype="application/vnd.oasis.opendocument.text"> + <office:meta><meta:creation-date>2024-11-30T17:13:03</meta:creation-date><meta:initial-creator>your servant</meta:initial-creator><dc:language>en-US</dc:language><dc:date>2024-12-05T06:45:52.650400638</dc:date><meta:editing-cycles>6</meta:editing-cycles><meta:editing-duration>PT4M37S</meta:editing-duration><meta:generator>LibreOfficeDev/25.8.0.0.alpha0$Linux_X86_64 LibreOffice_project/5f4d5a012865d717040012eb0f698a725b82d4cc</meta:generator><meta:document-statistic meta:table-count="0" meta:image-count="0" meta:object-count="0" meta:page-count="1" meta:paragraph-count="2" meta:word-count="2" meta:character-count="16" meta:non-whitespace-character-count="16"/><meta:user-defined meta:name="AppVersion">15.0000</meta:user-defined><meta:template xlink:type="simple" xlink:actuate="onRequest" xlink:title="Normal" xlink:href=""/></office:meta> + <office:font-face-decls> + <style:font-face style:name="Arial" svg:font-family="Arial" style:font-family-generic="swiss" style:font-pitch="variable"/> + <style:font-face style:name="NSimSun" svg:font-family="NSimSun" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Noto Sans" svg:font-family="'Noto Sans'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Tahoma1" svg:font-family="Tahoma" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Times New Roman" svg:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Times New Roman1" svg:font-family="'Times New Roman'" style:font-family-generic="system" style:font-pitch="variable"/> + </office:font-face-decls> + <office:styles> + <style:default-style style:family="graphic"> + <style:graphic-properties svg:stroke-color="#3465a4" draw:fill-color="#729fcf" fo:wrap-option="no-wrap" draw:shadow-offset-x="0.1181in" draw:shadow-offset-y="0.1181in" draw:start-line-spacing-horizontal="0.1114in" draw:start-line-spacing-vertical="0.1114in" draw:end-line-spacing-horizontal="0.1114in" draw:end-line-spacing-vertical="0.1114in" style:writing-mode="lr-tb" style:flow-with-text="false"/> + <style:paragraph-properties style:text-autospace="ideograph-alpha" style:line-break="strict" loext:tab-stop-distance="0in" style:font-independent-line-spacing="false"> + <style:tab-stops/> + </style:paragraph-properties> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN"/> + </style:default-style> + <style:default-style style:family="paragraph"> + <style:paragraph-properties fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:text-autospace="ideograph-alpha" style:punctuation-wrap="hanging" style:line-break="strict" style:tab-stop-distance="0.4925in" style:writing-mode="lr-tb"/> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext:hyphenation-zone="no-limit"/> + </style:default-style> + <style:default-style style:family="table"> + <style:table-properties table:border-model="collapsing"/> + </style:default-style> + <style:default-style style:family="table-row"> + <style:table-row-properties fo:keep-together="auto"/> + </style:default-style> + <style:style style:name="LO-normal" style:family="paragraph"> + <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0in" style:contextual-spacing="false" fo:text-align="start" style:justify-single-word="false" fo:orphans="2" fo:widows="2" fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:writing-mode="lr-tb"/> + <style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Times New Roman" fo:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable" fo:font-size="12pt" fo:language="fr" fo:country="CA" style:letter-kerning="false" style:font-name-asian="Times New Roman1" style:font-family-asian="'Times New Roman'" style:font-family-generic-asian="system" style:font-pitch-asian="variable" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Times New Roman1" style:font-family-complex="'Times New Roman'" style:font-family-generic-complex="system" style:font-pitch-complex="variable" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext: hyphenation-zone="no-limit"/> + </style:style> + <text:outline-style style:name="Outline"> + <text:outline-level-style text:level="1" loext:num-list-format="%1%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="2" loext:num-list-format="%2%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="3" loext:num-list-format="%3%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="4" loext:num-list-format="%4%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="5" loext:num-list-format="%5%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="6" loext:num-list-format="%6%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="7" loext:num-list-format="%7%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="8" loext:num-list-format="%8%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="9" loext:num-list-format="%9%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="10" loext:num-list-format="%10%" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + </text:outline-style> + <text:notes-configuration text:note-class="footnote" style:num-format="1" text:start-value="0" text:footnotes-position="page" text:start-numbering-at="document"/> + <text:notes-configuration text:note-class="endnote" style:num-format="i" text:start-value="0"/> + <text:linenumbering-configuration text:number-lines="false" text:offset="0.1965in" style:num-format="1" text:number-position="left" text:increment="5"/> + <style:default-page-layout> + <style:page-layout-properties style:writing-mode="lr-tb" style:layout-grid-standard-mode="true"/> + </style:default-page-layout> + </office:styles> + <office:automatic-styles> + <style:style style:name="P1" style:family="paragraph" style:parent-style-name="LO-normal"> + <style:text-properties style:font-name-complex="Noto Sans"/> + </style:style> + <style:style style:name="P2" style:family="paragraph" style:parent-style-name="LO-normal"> + <style:text-properties fo:color="#0000ff" loext:opacity="100%" style:font-name-complex="Noto Sans"/> + </style:style> + <style:style style:name="T1" style:family="text"> + <style:text-properties fo:color="#00ff00" loext:opacity="100%"/> + </style:style> + <style:style style:name="T2" style:family="text"> + <style:text-properties fo:color="#0000ff" loext:opacity="100%"/> + </style:style> + <style:page-layout style:name="pm1"> + <style:page-layout-properties fo:page-width="8.5in" fo:page-height="11in" style:num-format="1" style:print-orientation="portrait" fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in" style:writing-mode="lr-tb" style:layout-grid-color="#c0c0c0" style:layout-grid-lines="136" style:layout-grid-base-height="0.0693in" style:layout-grid-ruby-height="0in" style:layout-grid-mode="none" style:layout-grid-ruby-below="false" style:layout-grid-print="false" style:layout-grid-display="false" style:layout-grid-base-width="0.1665in" style:layout-grid-snap-to="true" style:footnote-max-height="0in" loext:margin-gutter="0in"> + <style:footnote-sep style:width="0.0071in" style:distance-before-sep="0.0398in" style:distance-after-sep="0.0398in" style:line-style="solid" style:adjustment="left" style:rel-width="25%" style:color="#000000"/> + </style:page-layout-properties> + <style:header-style/> + <style:footer-style/> + </style:page-layout> + <style:style style:name="dp1" style:family="drawing-page"> + <style:drawing-page-properties draw:background-size="full"/> + </style:style> + </office:automatic-styles> + <office:master-styles> + <style:master-page style:name="Standard" style:page-layout-name="pm1" draw:style-name="dp1"/> + </office:master-styles> + <office:body> + <office:text> + <text:sequence-decls> + <text:sequence-decl text:display-outline-level="0" text:name="Illustration"/> + <text:sequence-decl text:display-outline-level="0" text:name="Table"/> + <text:sequence-decl text:display-outline-level="0" text:name="Text"/> + <text:sequence-decl text:display-outline-level="0" text:name="Drawing"/> + <text:sequence-decl text:display-outline-level="0" text:name="Figure"/> + </text:sequence-decls> + <text:p text:style-name="P1"><text:span text:style-name="T1">वीथीर्</text:span><text:span text:style-name="T2">भजनमार्गान्</text:span></text:p> + <text:p text:style-name="P2">वीथीर्भजनमार्गान्</text:p> + </office:text> + </office:body> +</office:document> diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx index 3e36171f4ba0..20acdc856aeb 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx @@ -6077,6 +6077,55 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf162750SmallCapsLigature) CPPUNIT_ASSERT_EQUAL(u"FI"_ustr, aText.at(2).trim()); } +CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf164106SplitReorderedClusters) +{ + saveAsPDF(u"tdf164106.fodt"); + + auto pPdfDocument = parsePDFExport(); + CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount()); + + auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0); + CPPUNIT_ASSERT(pPdfPage); + auto pTextPage = pPdfPage->getTextPage(); + CPPUNIT_ASSERT(pTextPage); + + int nPageObjectCount = pPdfPage->getObjectCount(); + + CPPUNIT_ASSERT_EQUAL(14, nPageObjectCount); + + std::vector<OUString> aText; + std::vector<basegfx::B2DRectangle> aRect; + + for (int i = 0; i < nPageObjectCount; ++i) + { + auto pPageObject = pPdfPage->getObject(i); + CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr); + if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text) + { + aText.push_back(pPageObject->getText(pTextPage)); + aRect.push_back(pPageObject->getBounds()); + } + } + + CPPUNIT_ASSERT_EQUAL(size_t(14), aText.size()); + + auto fnCompareIndices = [&](size_t nSplit, size_t nCombined) { + CPPUNIT_ASSERT_EQUAL(aText.at(nSplit).trim(), aText.at(nCombined).trim()); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMinX(), aRect.at(nCombined).getMinX(), + /*delta*/ 0.2); + CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMaxX(), aRect.at(nCombined).getMaxX(), + /*delta*/ 0.2); + }; + + fnCompareIndices(0, 7); + fnCompareIndices(1, 8); + fnCompareIndices(2, 9); + fnCompareIndices(3, 10); + fnCompareIndices(4, 11); + fnCompareIndices(5, 12); + fnCompareIndices(6, 13); +} + } // end anonymous namespace CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/vcl/source/gdi/CommonSalLayout.cxx b/vcl/source/gdi/CommonSalLayout.cxx index 732c799e003b..b40851a2f18c 100644 --- a/vcl/source/gdi/CommonSalLayout.cxx +++ b/vcl/source/gdi/CommonSalLayout.cxx @@ -151,6 +151,14 @@ public: return nClusterId; } + void Reset() + { + for (auto& rElement : m_aGlyphs) + { + rElement.second.m_bUsed = false; + } + } + void ShapeSubRun(const sal_Unicode* pStr, const int nLength, const SubRun& aSubRun, hb_font_t* pHbFont, const std::vector<hb_feature_t>& maFeatures, hb_language_t oHbLanguage) @@ -598,6 +606,73 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay hb_glyph_info_t *pHbGlyphInfos = hb_buffer_get_glyph_infos(pHbBuffer, nullptr); hb_glyph_position_t *pHbPositions = hb_buffer_get_glyph_positions(pHbBuffer, nullptr); + // tdf#164106: Grapheme clusters can be split across multiple layouts. To do this, + // the complete string is laid out, and only the necessary glyphs are extracted. + // These sub-layouts are positioned side-by-side to form the complete text. + // This approach is good enough for most diacritic cases, but it cannot handle cases + // where a glyph with an advance is reordered into a different sub-layout. + bool bStartClusterOutOfOrder = false; + bool bEndClusterOutOfOrder = false; + { + double nNormalAdvance = 0.0; + double nStartAdvance = 0.0; + double nEndAdvance = 0.0; + + auto fnHandleGlyph = [&](int i) + { + int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; + int32_t nCluster = pHbGlyphInfos[i].cluster; + auto nOrigCharPos = stClusterMapper.RemapGlyph(nCluster, nGlyphIndex); + + double nAdvance = 0.0; + if (aSubRun.maDirection == HB_DIRECTION_TTB) + { + nAdvance = -pHbPositions[i].y_advance; + } + else + { + nAdvance = pHbPositions[i].x_advance; + } + + nNormalAdvance += nAdvance; + + if (nOrigCharPos < rArgs.mnDrawMinCharPos) + { + nStartAdvance += nAdvance; + if (nStartAdvance != nNormalAdvance) + { + bStartClusterOutOfOrder = true; + } + } + + if (nOrigCharPos < rArgs.mnDrawEndCharPos) + { + nEndAdvance += nAdvance; + if (nEndAdvance != nNormalAdvance) + { + bEndClusterOutOfOrder = true; + } + } + }; + + if (bRightToLeft) + { + for (int i = nRunGlyphCount - 1; i >= 0; --i) + { + fnHandleGlyph(i); + } + } + else + { + for (int i = 0; i < nRunGlyphCount; ++i) + { + fnHandleGlyph(i); + } + } + + stClusterMapper.Reset(); + } + for (int i = 0; i < nRunGlyphCount; ++i) { int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint; int32_t nCharPos = pHbGlyphInfos[i].cluster; @@ -740,14 +815,15 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay const GlyphItem aGI(nCharPos, nCharCount, nGlyphIndex, aNewPos, nGlyphFlags, nAdvance, nXOffset, nYOffset, nOrigCharPos); - if (aGI.origCharPos() >= rArgs.mnDrawMinCharPos - && aGI.origCharPos() < rArgs.mnDrawEndCharPos) + auto nLowerBound = (bStartClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos()); + auto nUpperBound = (bEndClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos()); + if (nLowerBound >= rArgs.mnDrawMinCharPos && nUpperBound < rArgs.mnDrawEndCharPos) { m_GlyphItems.push_back(aGI); } - if (aGI.origCharPos() >= rArgs.mnDrawOriginCluster - && aGI.origCharPos() < rArgs.mnDrawEndCharPos) + if (nLowerBound >= rArgs.mnDrawOriginCluster + && nUpperBound < rArgs.mnDrawEndCharPos) { nCurrX += nAdvance; }
