sw/qa/filter/md/md.cxx | 45 ++++++++++++++++++++++++++++++++++++++++++ sw/source/filter/md/wrtmd.cxx | 25 ++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-)
New commits: commit 1f20ef07209902b114dd444fff75f22c2dba70aa Author: Miklos Vajna <[email protected]> AuthorDate: Tue Sep 9 08:37:26 2025 +0200 Commit: Caolán McNamara <[email protected]> CommitDate: Tue Sep 9 09:50:26 2025 +0200 tdf#168317 sw markdown export: handle code block The bugdoc has a multi-paragraph code block, import that to Writer, save as markdown, the text is written as normal text. Note that unlike block quote or heading, there is no marker at the start of all paragraphs: only before the first and after the last one. However, in the Writer doc model, this is a per-paragraph setting for the used style. Fix the problem by checking if the para style is RES_POOLCOLL_HTML_PRE, then seeing if there is a similar paragraph before/after, so we only emit the start/end marker only once, even for multi-paragraph code blocks. Note that our own markdown import currently creates single-paragraph code blocks by replacing paragraph breaks with newlines. Change-Id: I0803bc71c758f250489d11e90b7f090966c8a8cc Reviewed-on: https://gerrit.libreoffice.org/c/core/+/190687 Tested-by: Jenkins CollaboraOffice <[email protected]> Reviewed-by: Caolán McNamara <[email protected]> diff --git a/sw/qa/filter/md/md.cxx b/sw/qa/filter/md/md.cxx index f1a47d89e9a9..aea24eec80e2 100644 --- a/sw/qa/filter/md/md.cxx +++ b/sw/qa/filter/md/md.cxx @@ -418,6 +418,51 @@ CPPUNIT_TEST_FIXTURE(Test, testBlockQuoteMdExport) CPPUNIT_ASSERT_EQUAL(aExpected, aActual); } +CPPUNIT_TEST_FIXTURE(Test, testCodeBlockMdExport) +{ + // Given a document that has a multi-paragraph code block: + createSwDoc(); + SwDocShell* pDocShell = getSwDocShell(); + SwWrtShell* pWrtShell = pDocShell->GetWrtShell(); + pWrtShell->Insert(u"A"_ustr); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"B"_ustr); + SwCursor* pCursor = pWrtShell->GetCursor(); + SwDoc* pDoc = pDocShell->GetDoc(); + IDocumentStylePoolAccess& rIDSPA = pDoc->getIDocumentStylePoolAccess(); + SwTextFormatColl* pColl = rIDSPA.GetTextCollFromPool(RES_POOLCOLL_HTML_PRE); + pDoc->SetTextFormatColl(*pCursor, pColl); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"C"_ustr); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"D"_ustr); + pColl = rIDSPA.GetTextCollFromPool(RES_POOLCOLL_STANDARD); + pDoc->SetTextFormatColl(*pCursor, pColl); + + // When saving that to markdown: + save(mpFilter); + + // Then make sure the code block is exported: + std::string aActual = TempFileToString(); + std::string aExpected( + // clang-format off + "A" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "```" SAL_NEWLINE_STRING + "B" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "C" SAL_NEWLINE_STRING + "```" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "D" SAL_NEWLINE_STRING + // clang-format on + ); + // Without the accompanying fix in place, this test would have failed with: + // - Actual : A B C D + // i.e. the code block formatting was lost. + CPPUNIT_ASSERT_EQUAL(aExpected, aActual); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sw/source/filter/md/wrtmd.cxx b/sw/source/filter/md/wrtmd.cxx index c4f9ed0ac18b..6e55f4792a28 100644 --- a/sw/source/filter/md/wrtmd.cxx +++ b/sw/source/filter/md/wrtmd.cxx @@ -457,7 +457,7 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); const SwFormatColl* pFormatColl = rNode.GetFormatColl(); - if (pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_BLOCKQUOTE) + if (pFormatColl && pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_BLOCKQUOTE) { // <https://spec.commonmark.org/0.31.2/#block-quotes> first block quote, then heading. rWrt.Strm().WriteUnicodeOrByteText(u"> "); @@ -537,6 +537,18 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir } } + if (pFormatColl && pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_PRE) + { + // Before the first paragraph of a code block, see + // <https://spec.commonmark.org/0.31.2/#fenced-code-blocks>. + SwTextNode* pPrevNode = rWrt.m_pDoc->GetNodes()[rNode.GetIndex() - 1]->GetTextNode(); + const SwFormatColl* pPrevColl = pPrevNode ? pPrevNode->GetFormatColl() : nullptr; + if (!pPrevColl || pPrevColl->GetPoolFormatId() != RES_POOLCOLL_HTML_PRE) + { + rWrt.Strm().WriteUnicodeOrByteText(u"```" SAL_NEWLINE_STRING); + } + } + sal_Int32 nStrPos = rWrt.m_pCurrentPam->GetPoint()->GetContentIndex(); sal_Int32 nEnd = rNodeText.getLength(); if (rWrt.m_pCurrentPam->GetPoint()->GetNode() == rWrt.m_pCurrentPam->GetMark()->GetNode()) @@ -621,6 +633,17 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir assert(positions.hintStarts.current() == nullptr); // Output final closing attributes OutFormattingChange(rWrt, positions, nEnd, currentStatus); + + if (pFormatColl && pFormatColl->GetPoolFormatId() == RES_POOLCOLL_HTML_PRE) + { + // After the last paragraph of a code block. + SwTextNode* pNextNode = rWrt.m_pDoc->GetNodes()[rNode.GetIndex() + 1]->GetTextNode(); + const SwFormatColl* pNextColl = pNextNode ? pNextNode->GetFormatColl() : nullptr; + if (!pNextColl || pNextColl->GetPoolFormatId() != RES_POOLCOLL_HTML_PRE) + { + rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING "```"); + } + } } bool bRowEnd = oCellInfo && oCellInfo->bRowEnd;
