glib/poppler-page.cc | 6 +++--- poppler/TextOutputDev.cc | 9 ++++++++- qt5/tests/check_search.cpp | 27 +++++++++++++++++++++++++++ qt6/tests/check_search.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 4 deletions(-)
New commits: commit 8ce9069f5dff8636373c546f33495908a963777e Author: Nelson Benítez León <nbenit...@gmail.com> Date: Sat Apr 23 18:14:37 2022 -0400 fix multiline find_text() bug in two column docs Fix for a bug in double column documents where some single line matches are wrongly returned as being multiline matches. Includes test case for the bug. diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index a9515f3b..8e361a58 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -864,7 +864,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text, xMin = 0; yMin = backwards ? height : 0; - continueMatch.x1 = G_MAXDOUBLE; // we use this to detect valid returned values + continueMatch.x1 = std::numeric_limits<double>::max(); // we use this to detect valid returned values while (text_dev->findText(ucs4, ucs4_len, false, true, // startAtTop, stopAtBottom start_at_last, @@ -881,7 +881,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text, matches = g_list_prepend(matches, match); start_at_last = TRUE; - if (continueMatch.x1 != G_MAXDOUBLE) { + if (continueMatch.x1 != std::numeric_limits<double>::max()) { // received rect for next-line part of a multi-line match, add it. if (multiline) { match->match_continued = true; @@ -896,7 +896,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text, matches = g_list_prepend(matches, match); } - continueMatch.x1 = G_MAXDOUBLE; + continueMatch.x1 = std::numeric_limits<double>::max(); } } diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 23e0a7ae..4a37b29f 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4146,6 +4146,12 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB continueMatch->x2 = xMax2; continueMatch->y2 = yMin2; } + } else if (continueMatch && continueMatch->x1 != std::numeric_limits<double>::max()) { + if (ignoredHyphen) { + *ignoredHyphen = false; + } + + continueMatch->x1 = std::numeric_limits<double>::max(); } } } diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp index 0ec67752..c9bb65e3 100644 --- a/qt5/tests/check_search.cpp +++ b/qt5/tests/check_search.cpp @@ -11,6 +11,7 @@ public: explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { } private slots: void testAcrossLinesSearch(); // leave it first + void testAcrossLinesSearchDoubleColumn(); void bug7063(); void testNextAndPrevious(); void testWholeWordsOnly(); @@ -370,5 +371,26 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page->search(bug_str, mode2).size(), 1); } +void TestSearch::testAcrossLinesSearchDoubleColumn() +{ + // Test for searching across lines with new flag Poppler::Page::AcrossLines + // in a document with two columns of text. + QScopedPointer<Poppler::Document> document(Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf")); + QVERIFY(document); + + QScopedPointer<Poppler::Page> page(document->page(0)); + QVERIFY(page); + + const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase; + + // Test for a bug in double column documents where single line matches are + // wrongly returned as being multiline matches. + const QString bug_str = QString::fromUtf8("betw"); // clazy:exclude=qstring-allocations + + // there's only 3 matches for 'betw' in document, where only the last + // one is a multiline match, so that's a total of 4 rects returned + QCOMPARE(page->search(bug_str, mode).size(), 4); +} + QTEST_GUILESS_MAIN(TestSearch) #include "check_search.moc" diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp index 2e55ba01..ede2d0c2 100644 --- a/qt6/tests/check_search.cpp +++ b/qt6/tests/check_search.cpp @@ -9,6 +9,7 @@ public: explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { } private slots: void testAcrossLinesSearch(); // leave it first + void testAcrossLinesSearchDoubleColumn(); void bug7063(); void testNextAndPrevious(); void testWholeWordsOnly(); @@ -369,5 +370,26 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page->search(bug_str, mode2).size(), 1); } +void TestSearch::testAcrossLinesSearchDoubleColumn() +{ + // Test for searching across lines with new flag Poppler::Page::AcrossLines + // in a document with two columns of text. + std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf"); + QVERIFY(document); + + std::unique_ptr<Poppler::Page> page = document->page(0); + QVERIFY(page); + + const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase; + + // Test for a bug in double column documents where single line matches are + // wrongly returned as being multiline matches. + const QString bug_str = QString::fromUtf8("betw"); // clazy:exclude=qstring-allocations + + // there's only 3 matches for 'betw' in document, where only the last + // one is a multiline match, so that's a total of 4 rects returned + QCOMPARE(page->search(bug_str, mode).size(), 4); +} + QTEST_GUILESS_MAIN(TestSearch) #include "check_search.moc" commit 309004931712476b0ee751fc60224a87c14daf56 Author: Nelson Benítez León <nbenit...@gmail.com> Date: Mon Apr 18 20:03:49 2022 -0400 fix bug in multiline find_text() which caused some false positives being returned. Includes test case for the bug. See original comment about this bug: https://gitlab.gnome.org/GNOME/evince/-/merge_requests/159#note_1431380 diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 439143c7..23e0a7ae 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4044,6 +4044,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB for (k = 0; k < len; ++k) { bool last_char_of_line = j + k == m - 1; bool last_char_of_search_term = k == len - 1; + bool match_started = (bool)k; if (p[k] != s2[k] || (nextline && last_char_of_line && !last_char_of_search_term)) { // now check if the comparison failed at the end-of-line hyphen, @@ -4055,7 +4056,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB break; } k++; - } else if (p[k] != (Unicode)'-' || UnicodeIsWhitespace(s2[k])) { + } else if (!match_started || p[k] != (Unicode)'-' || !last_char_of_line || UnicodeIsWhitespace(s2[k])) { break; } else { nextlineAfterHyphen = true; diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp index cf57c133..0ec67752 100644 --- a/qt5/tests/check_search.cpp +++ b/qt5/tests/check_search.cpp @@ -363,6 +363,11 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true); QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true); QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true); + + // BUG about false positives at start of a line. + const QString bug_str = QString::fromUtf8("nes y"); // clazy:exclude=qstring-allocations + // there's only 1 match, check for that + QCOMPARE(page->search(bug_str, mode2).size(), 1); } QTEST_GUILESS_MAIN(TestSearch) diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp index b0e84482..2e55ba01 100644 --- a/qt6/tests/check_search.cpp +++ b/qt6/tests/check_search.cpp @@ -362,6 +362,11 @@ void TestSearch::testAcrossLinesSearch() QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true); QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true); QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true); + + // BUG about false positives at start of a line. + const QString bug_str = QString::fromUtf8("nes y"); // clazy:exclude=qstring-allocations + // there's only 1 match, check for that + QCOMPARE(page->search(bug_str, mode2).size(), 1); } QTEST_GUILESS_MAIN(TestSearch)