include/vcl/filter/PDFiumLibrary.hxx | 40 ++++++++++++++++++++++ svx/source/svdraw/svdpdf.cxx | 11 ++---- vcl/qa/cppunit/PDFiumLibraryTest.cxx | 37 ++++++++++++++++++++ vcl/source/pdf/PDFiumLibrary.cxx | 62 +++++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 8 deletions(-)
New commits: commit 440cb3fb80d9fd356871eac410b9797f23433722 Author: Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk> AuthorDate: Sun Jun 28 10:12:17 2020 +0200 Commit: Tomaž Vajngerl <qui...@gmail.com> CommitDate: Mon Jun 29 14:35:37 2020 +0200 pdf: add PDFiumTextPage and PDFiumPageObject + test Also use it in ImpSdrPdfImport. Change-Id: I6d353ef60d036c3516448e64a50b25a9befd5db8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97364 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <qui...@gmail.com> diff --git a/include/vcl/filter/PDFiumLibrary.hxx b/include/vcl/filter/PDFiumLibrary.hxx index 35826097e45e..501f964f395d 100644 --- a/include/vcl/filter/PDFiumLibrary.hxx +++ b/include/vcl/filter/PDFiumLibrary.hxx @@ -69,6 +69,41 @@ public: std::unique_ptr<PDFiumAnnotation> getLinked(OString const& rKey); }; +class PDFiumTextPage; + +class VCL_DLLPUBLIC PDFiumPageObject final +{ +private: + FPDF_PAGEOBJECT mpPageObject; + + PDFiumPageObject(const PDFiumPageObject&) = delete; + PDFiumPageObject& operator=(const PDFiumPageObject&) = delete; + +public: + PDFiumPageObject(FPDF_PAGEOBJECT pPageObject); + ~PDFiumPageObject(); + + FPDF_PAGEOBJECT getPointer() { return mpPageObject; } + + int getType(); + OUString getText(std::unique_ptr<PDFiumTextPage> const& pTextPage); +}; + +class VCL_DLLPUBLIC PDFiumTextPage final +{ +private: + FPDF_TEXTPAGE mpTextPage; + + PDFiumTextPage(const PDFiumTextPage&) = delete; + PDFiumTextPage& operator=(const PDFiumTextPage&) = delete; + +public: + PDFiumTextPage(FPDF_TEXTPAGE pTextPage); + ~PDFiumTextPage(); + + FPDF_TEXTPAGE getPointer() { return mpTextPage; } +}; + class VCL_DLLPUBLIC PDFiumPage final { private: @@ -92,10 +127,15 @@ public: FPDF_PAGE getPointer() { return mpPage; } + int getObjectCount(); + std::unique_ptr<PDFiumPageObject> getObject(int nIndex); + int getAnnotationCount(); int getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation); std::unique_ptr<PDFiumAnnotation> getAnnotation(int nIndex); + + std::unique_ptr<PDFiumTextPage> getTextPage(); }; class VCL_DLLPUBLIC PDFiumDocument final diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx index 4b21cb2d08fe..9fa8c82f722b 100644 --- a/svx/source/svdraw/svdpdf.cxx +++ b/svx/source/svdraw/svdpdf.cxx @@ -160,17 +160,16 @@ void ImpSdrPdfImport::DoObjects(SvdProgressInfo* pProgrInfo, sal_uInt32* pAction SetupPageScale(dPageWidth, dPageHeight); // Load the page text to extract it when we get text elements. - FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(pPdfPage->getPointer()); + auto pTextPage = pPdfPage->getTextPage(); - const int nPageObjectCount = FPDFPage_CountObjects(pPdfPage->getPointer()); + const int nPageObjectCount = pPdfPage->getObjectCount(); if (pProgrInfo) pProgrInfo->SetActionCount(nPageObjectCount); for (int nPageObjectIndex = 0; nPageObjectIndex < nPageObjectCount; ++nPageObjectIndex) { - FPDF_PAGEOBJECT pPageObject - = FPDFPage_GetObject(pPdfPage->getPointer(), nPageObjectIndex); - ImportPdfObject(pPageObject, pTextPage, nPageObjectIndex); + auto pPageObject = pPdfPage->getObject(nPageObjectIndex); + ImportPdfObject(pPageObject->getPointer(), pTextPage->getPointer(), nPageObjectIndex); if (pProgrInfo && pActionsToReport) { (*pActionsToReport)++; @@ -184,8 +183,6 @@ void ImpSdrPdfImport::DoObjects(SvdProgressInfo* pProgrInfo, sal_uInt32* pAction } } } - - FPDFText_ClosePage(pTextPage); } } diff --git a/vcl/qa/cppunit/PDFiumLibraryTest.cxx b/vcl/qa/cppunit/PDFiumLibraryTest.cxx index 61b3981731f6..9c0c92607b14 100644 --- a/vcl/qa/cppunit/PDFiumLibraryTest.cxx +++ b/vcl/qa/cppunit/PDFiumLibraryTest.cxx @@ -35,6 +35,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase void testDocument(); void testPages(); + void testPageObjects(); void testAnnotationsMadeInEvince(); void testAnnotationsMadeInAcrobat(); void testTools(); @@ -42,6 +43,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase CPPUNIT_TEST_SUITE(PDFiumLibraryTest); CPPUNIT_TEST(testDocument); CPPUNIT_TEST(testPages); + CPPUNIT_TEST(testPageObjects); CPPUNIT_TEST(testAnnotationsMadeInEvince); CPPUNIT_TEST(testAnnotationsMadeInAcrobat); CPPUNIT_TEST(testTools); @@ -74,12 +76,35 @@ void PDFiumLibraryTest::testDocument() auto aSize = pDocument->getPageSize(0); CPPUNIT_ASSERT_EQUAL(612.0, aSize.getX()); CPPUNIT_ASSERT_EQUAL(792.0, aSize.getY()); +} + +void PDFiumLibraryTest::testPages() +{ + OUString aURL = getFullUrl("Pangram.pdf"); + SvFileStream aStream(aURL, StreamMode::READ); + GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter(); + Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream); + aGraphic.makeAvailable(); + + auto pVectorGraphicData = aGraphic.getVectorGraphicData(); + CPPUNIT_ASSERT(pVectorGraphicData); + CPPUNIT_ASSERT_EQUAL(VectorGraphicDataType::Pdf, + pVectorGraphicData->getVectorGraphicDataType()); + + const void* pData = pVectorGraphicData->getVectorGraphicDataArray().getConstArray(); + int nLength = pVectorGraphicData->getVectorGraphicDataArrayLength(); + + auto pPdfium = vcl::pdf::PDFiumLibrary::get(); + auto pDocument = pPdfium->openDocument(pData, nLength); + CPPUNIT_ASSERT(pDocument); + + CPPUNIT_ASSERT_EQUAL(1, pDocument->getPageCount()); auto pPage = pDocument->openPage(0); CPPUNIT_ASSERT(pPage); } -void PDFiumLibraryTest::testPages() +void PDFiumLibraryTest::testPageObjects() { OUString aURL = getFullUrl("Pangram.pdf"); SvFileStream aStream(aURL, StreamMode::READ); @@ -103,6 +128,16 @@ void PDFiumLibraryTest::testPages() auto pPage = pDocument->openPage(0); CPPUNIT_ASSERT(pPage); + + CPPUNIT_ASSERT_EQUAL(12, pPage->getObjectCount()); + + auto pPageObject = pPage->getObject(0); + auto pTextPage = pPage->getTextPage(); + + CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType()); + CPPUNIT_ASSERT_EQUAL(OUString("The quick, brown fox jumps over a lazy dog. DJs flock by when " + "MTV ax quiz prog. Junk MTV quiz "), + pPageObject->getText(pTextPage)); } void PDFiumLibraryTest::testAnnotationsMadeInEvince() diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx index 7e723c56bf88..92d0cf84a201 100644 --- a/vcl/source/pdf/PDFiumLibrary.cxx +++ b/vcl/source/pdf/PDFiumLibrary.cxx @@ -15,6 +15,7 @@ #include <vcl/filter/PDFiumLibrary.hxx> #include <fpdf_annot.h> #include <fpdf_edit.h> +#include <fpdf_text.h> namespace vcl::pdf { @@ -166,6 +167,19 @@ basegfx::B2DSize PDFiumDocument::getPageSize(int nIndex) int PDFiumDocument::getPageCount() { return FPDF_GetPageCount(mpPdfDocument); } +int PDFiumPage::getObjectCount() { return FPDFPage_CountObjects(mpPage); } + +std::unique_ptr<PDFiumPageObject> PDFiumPage::getObject(int nIndex) +{ + std::unique_ptr<PDFiumPageObject> pPDFiumPageObject; + FPDF_PAGEOBJECT pPageObject = FPDFPage_GetObject(mpPage, nIndex); + if (pPageObject) + { + pPDFiumPageObject = std::make_unique<PDFiumPageObject>(pPageObject); + } + return pPDFiumPageObject; +} + int PDFiumPage::getAnnotationCount() { return FPDFPage_GetAnnotCount(mpPage); } int PDFiumPage::getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation) @@ -184,6 +198,42 @@ std::unique_ptr<PDFiumAnnotation> PDFiumPage::getAnnotation(int nIndex) return pPDFiumAnnotation; } +std::unique_ptr<PDFiumTextPage> PDFiumPage::getTextPage() +{ + std::unique_ptr<PDFiumTextPage> pPDFiumTextPage; + FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(mpPage); + if (pTextPage) + { + pPDFiumTextPage = std::make_unique<PDFiumTextPage>(pTextPage); + } + return pPDFiumTextPage; +} + +PDFiumPageObject::PDFiumPageObject(FPDF_PAGEOBJECT pPageObject) + : mpPageObject(pPageObject) +{ +} + +PDFiumPageObject::~PDFiumPageObject() {} + +OUString PDFiumPageObject::getText(std::unique_ptr<PDFiumTextPage> const& pTextPage) +{ + OUString sReturnText; + + const int nBytes = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), nullptr, 0); + + std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nBytes]); + + const int nActualBytes + = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), pText.get(), nBytes); + if (nActualBytes > 2) + sReturnText = OUString(pText.get()); + + return sReturnText; +} + +int PDFiumPageObject::getType() { return FPDFPageObj_GetType(mpPageObject); } + PDFiumAnnotation::PDFiumAnnotation(FPDF_ANNOTATION pAnnotation) : mpAnnotation(pAnnotation) { @@ -238,6 +288,18 @@ std::unique_ptr<PDFiumAnnotation> PDFiumAnnotation::getLinked(OString const& rKe } return pPDFiumAnnotation; } + +PDFiumTextPage::PDFiumTextPage(FPDF_TEXTPAGE pTextPage) + : mpTextPage(pTextPage) +{ +} + +PDFiumTextPage::~PDFiumTextPage() +{ + if (mpTextPage) + FPDFText_ClosePage(mpTextPage); +} + } // end vcl::pdf #endif // HAVE_FEATURE_PDFIUM _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits