Date: Wednesday, December 19, 2018 @ 23:31:35 Author: bgyorgy Revision: 416805
archrelease: copy trunk to community-any Added: paperwork/repos/ paperwork/repos/community-any/ paperwork/repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch (from rev 416804, paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch) paperwork/repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch (from rev 416804, paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch) paperwork/repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch (from rev 416804, paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch) paperwork/repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (from rev 416804, paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch) paperwork/repos/community-any/PKGBUILD (from rev 416804, paperwork/trunk/PKGBUILD) -----------------------------------------------------------------+ 0001-Filter-out-boxes-that-start-at-0-0.patch | 37 ++++ 0001-Filter-out-too-large-boxes-on-selection.patch | 39 ++++ 0001-Fix-importing-PNG-files-with-transparency.patch | 25 ++ 0001-util-find_language-New-versions-of-pycountry-do-not-.patch | 87 ++++++++++ PKGBUILD | 62 +++++++ 5 files changed, 250 insertions(+) Copied: paperwork/repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch (from rev 416804, paperwork/trunk/0001-Filter-out-boxes-that-start-at-0-0.patch) =================================================================== --- repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch (rev 0) +++ repos/community-any/0001-Filter-out-boxes-that-start-at-0-0.patch 2018-12-19 23:31:35 UTC (rev 416805) @@ -0,0 +1,37 @@ +From d1da8cd20554aa1d3c766855c251a5a643d2abb4 Mon Sep 17 00:00:00 2001 +From: Jonas Wloka <c...@jowlo.de> +Date: Sat, 17 Nov 2018 19:54:20 +0100 +Subject: [PATCH] Filter out boxes that start at (0, 0) + +Tesseract returns way too large boxes that cover the whole page, mostly +containing only a single special character. All of these boxes (in my +tests) have coordinate 0 0. + +This filters out all of these boxes. +--- + paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +index 35c6b7f4..e324c670 100644 +--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py ++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +@@ -251,10 +251,14 @@ class JobPageBoxesLoader(Job): + boxes = set() + for line in line_boxes: + for word in line.word_boxes: +- if word.content.strip() == "": ++ if word.content.strip() == "" or (word.position[0][0] == 0 ++ and word.position[0][1] == 0): + # XXX(Jflesch): Tesseract 3.03 (hOCR) returns big and + # empty word boxes sometimes (just a single space + # inside). They often match images, but not always. ++ # XXX(jowlo): Tesseract returns large boxes containing ++ # single letters (mostly special chars) that cover the ++ # whole page. All of these start at (0 0) + continue + boxes.add(word) + +-- +2.20.0 + Copied: paperwork/repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch (from rev 416804, paperwork/trunk/0001-Filter-out-too-large-boxes-on-selection.patch) =================================================================== --- repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch (rev 0) +++ repos/community-any/0001-Filter-out-too-large-boxes-on-selection.patch 2018-12-19 23:31:35 UTC (rev 416805) @@ -0,0 +1,39 @@ +From 318d9ef80a7dc21da7ad45fa46c11c8fb19ec8fb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballog...@gmail.com> +Date: Wed, 19 Dec 2018 16:59:46 +0100 +Subject: [PATCH] Filter out too large boxes on selection + +Tesseract returns way too large boxes that cover the whole page, mostly containing only a single special character. + +This is a complement for commit d1da8cd2 to filter out all of these boxes also on text selection. +--- + paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +index e324c670..b022b178 100644 +--- a/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py ++++ b/paperwork-gtk/src/paperwork/frontend/mainwindow/pages.py +@@ -731,6 +731,9 @@ class SimplePageDrawer(Drawer): + index = 0 + for line in self.boxes['lines']: + for box in line.word_boxes: ++ if box.content.strip() == "" or (box.position[0][0] == 0 ++ and box.position[0][1] == 0): ++ continue + rcx = (box.position[0][0] + box.position[1][0]) / 2 + rcy = (box.position[0][1] + box.position[1][1]) / 2 + w = box.position[1][0] - box.position[0][0] +@@ -767,6 +770,9 @@ class SimplePageDrawer(Drawer): + selected = [] + for line in self.boxes['lines']: + for box in line.word_boxes: ++ if box.content.strip() == "" or (box.position[0][0] == 0 ++ and box.position[0][1] == 0): ++ continue + if box == box_start: + in_list = True + if in_list: +-- +2.20.0 + Copied: paperwork/repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch (from rev 416804, paperwork/trunk/0001-Fix-importing-PNG-files-with-transparency.patch) =================================================================== --- repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch (rev 0) +++ repos/community-any/0001-Fix-importing-PNG-files-with-transparency.patch 2018-12-19 23:31:35 UTC (rev 416805) @@ -0,0 +1,25 @@ +From 2ffb468a41d28eceda3afe869dd8c5af70203bf4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ball=C3=B3=20Gy=C3=B6rgy?= <ballog...@gmail.com> +Date: Wed, 19 Dec 2018 17:19:27 +0100 +Subject: [PATCH] Fix importing PNG files with transparency + +Pillow does not allow to save images with transparency to JPEG. These images need to be converted first. +--- + paperwork-backend/paperwork_backend/img/page.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/paperwork-backend/paperwork_backend/img/page.py b/paperwork-backend/paperwork_backend/img/page.py +index ddbb9214..86d5e4be 100644 +--- a/paperwork-backend/paperwork_backend/img/page.py ++++ b/paperwork-backend/paperwork_backend/img/page.py +@@ -141,6 +141,7 @@ class ImgPage(BasicPage): + + def __set_img(self, img): + with self.fs.open(self.__img_path, 'wb') as fd: ++ img = img.convert("RGB") + img.save(fd, format="JPEG") + + img = property(__get_img, __set_img) +-- +2.20.0 + Copied: paperwork/repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (from rev 416804, paperwork/trunk/0001-util-find_language-New-versions-of-pycountry-do-not-.patch) =================================================================== --- repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch (rev 0) +++ repos/community-any/0001-util-find_language-New-versions-of-pycountry-do-not-.patch 2018-12-19 23:31:35 UTC (rev 416805) @@ -0,0 +1,87 @@ +From ad4555f9904805e4fd56a30ca6529536edb0f919 Mon Sep 17 00:00:00 2001 +From: Jerome Flesch <jfle...@openpaper.work> +Date: Wed, 19 Dec 2018 22:38:59 +0100 +Subject: [PATCH] util: find_language(): New versions of pycountry do not raise + exception when a language is not found, they return None. Ref #812 + +Signed-off-by: Jerome Flesch <jfle...@openpaper.work> +--- + paperwork-backend/paperwork_backend/util.py | 60 ++++++++------------- + 1 file changed, 23 insertions(+), 37 deletions(-) + +diff --git a/paperwork-backend/paperwork_backend/util.py b/paperwork-backend/paperwork_backend/util.py +index 90fd0eb9..73701c8a 100644 +--- a/paperwork-backend/paperwork_backend/util.py ++++ b/paperwork-backend/paperwork_backend/util.py +@@ -312,45 +312,31 @@ def find_language(lang_str=None, allow_none=False): + lang_str = lang_str.split("_")[0] + + try: +- return pycountry.pycountry.languages.get(name=lang_str.title()) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso_639_3_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso639_3_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso639_2T_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(iso639_1_code=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(terminology=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(bibliographic=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(alpha_3=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(alpha_2=lang_str) +- except (KeyError, UnicodeDecodeError): +- pass +- try: +- return pycountry.pycountry.languages.get(alpha2=lang_str) ++ r = pycountry.pycountry.languages.get(name=lang_str.title()) ++ if r is not None: ++ return r + except (KeyError, UnicodeDecodeError): + pass ++ ++ ATTRS = ( ++ 'iso_639_3_code', ++ 'iso639_3_code', ++ 'iso639_2T_code', ++ 'iso639_1_code', ++ 'terminology', ++ 'bibliographic', ++ 'alpha_3', ++ 'alpha_2', ++ 'alpha2' ++ ) ++ for attr in ATTRS: ++ try: ++ r = pycountry.pycountry.languages.get(**{attr: lang_str}) ++ if r is not None: ++ return r ++ except (KeyError, UnicodeDecodeError): ++ pass ++ + if allow_none: + logger.warning("Unknown language [{}]".format(lang_str)) + return None +-- +2.20.0 + Copied: paperwork/repos/community-any/PKGBUILD (from rev 416804, paperwork/trunk/PKGBUILD) =================================================================== --- repos/community-any/PKGBUILD (rev 0) +++ repos/community-any/PKGBUILD 2018-12-19 23:31:35 UTC (rev 416805) @@ -0,0 +1,62 @@ +# Maintainer: Balló György <ballogyor+arch at gmail dot com> + +pkgname=paperwork +pkgver=1.2.4 +pkgrel=3 +pkgdesc="Personal document manager for GNOME to manage scanned documents and PDFs" +arch=(any) +url="https://openpaper.work/" +license=(GPL3) +depends=(gtk3 libnotify poppler-glib python-cairo python-dateutil python-gobject + python-levenshtein python-natsort python-pillowfight python-pycountry python-pyenchant + python-pyinsane python-pyocr python-setuptools python-simplebayes python-termcolor + python-whoosh python-xdg) +makedepends=(git) +_commit=36c00eaac636c6fa6db58f2f86a01e1c1abf2398 # tags/1.2.4^0 +source=("git+https://gitlab.gnome.org/World/OpenPaperwork/paperwork.git#commit=$_commit" + 0001-Filter-out-boxes-that-start-at-0-0.patch + 0001-Filter-out-too-large-boxes-on-selection.patch + 0001-Fix-importing-PNG-files-with-transparency.patch + 0001-util-find_language-New-versions-of-pycountry-do-not-.patch) +sha256sums=('SKIP' + '8fb8b760fba68c393b33bf2f2b52c6e7edf46d8958863c8f02c4f988842d84e3' + '1e11b2c18181b9ab36f51f25ff2e489759ae12245de41a4dcf377def400afe99' + 'aa867fb97da9a53c0e2beb758c26881dc2a16265e9b2deaa83e6fe1443ec8260' + '00f501fb42abe44ec34982fbfd0f71cb4a589e0cc4bb983e0bfa50156fc30d3c') + +pkgver() { + cd $pkgname + git describe --tags | sed 's/-/+/g' +} + +prepare() { + cd $pkgname + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/781 + patch -Np1 -i ../0001-Filter-out-boxes-that-start-at-0-0.patch + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/782 + patch -Np1 -i ../0001-Filter-out-too-large-boxes-on-selection.patch + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/merge_requests/783 + patch -Np1 -i ../0001-Fix-importing-PNG-files-with-transparency.patch + + # https://gitlab.gnome.org/World/OpenPaperwork/paperwork/issues/812 + patch -Np1 -i ../0001-util-find_language-New-versions-of-pycountry-do-not-.patch +} + +build() { + cd $pkgname + make +} + +package() { + cd $pkgname/paperwork-backend + python3 setup.py install --root="$pkgdir" --optimize=1 + + cd ../paperwork-gtk + python3 setup.py install --root="$pkgdir" --optimize=1 + + cd "$pkgdir"/usr/lib/python3.7/site-packages/paperwork/frontend/ + PYTHONPATH=`echo "$pkgdir"/usr/lib/python*/site-packages/` python3 -c 'import shell; shell.install_system(icon_basedir="../../../../../share/icons", data_basedir="../../../../../share")' +}