On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: > We are going to reuse the tesseract OCR code. > Create a new tesseract_ocr() helper and use it. > > Signed-off-by: Philippe Mathieu-Daudé <f4...@amsat.org> > --- > tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- > tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ > 2 files changed, 23 insertions(+), 16 deletions(-) > > diff --git a/tests/acceptance/machine_m68k_nextcube.py > b/tests/acceptance/machine_m68k_nextcube.py > index 3c7400c43e4..09e2745cc52 100644 > --- a/tests/acceptance/machine_m68k_nextcube.py > +++ b/tests/acceptance/machine_m68k_nextcube.py > @@ -7,13 +7,11 @@ > > import os > import time > -import logging > > from avocado_qemu import Test > from avocado import skipUnless > -from avocado.utils import process > > -from tesseract_utils import tesseract_available > +from tesseract_utils import tesseract_available, tesseract_ocr > > PIL_AVAILABLE = True > try: > @@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self): > def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): > screenshot_path = os.path.join(self.workdir, "dump.ppm") > self.check_bootrom_framebuffer(screenshot_path) > - > - console_logger = logging.getLogger('console') > - text = process.run("tesseract %s stdout" % > screenshot_path).stdout_text > - for line in text.split('\n'): > - if len(line): > - console_logger.debug(line) > + lines = tesseract_ocr(screenshot_path, tesseract_version=3) > + text = '\n'.join(lines) > self.assertIn('Backplane', text) > self.assertIn('Ethernet address', text) > > @@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): > def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): > screenshot_path = os.path.join(self.workdir, "dump.ppm") > self.check_bootrom_framebuffer(screenshot_path) > - > - console_logger = logging.getLogger('console') > - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) > - text = proc.stdout_text > - for line in text.split('\n'): > - if len(line): > - console_logger.debug(line) > + lines = tesseract_ocr(screenshot_path, tesseract_version=4) > + text = '\n'.join(lines) > self.assertIn('Testing the FPU, SCC', text) > self.assertIn('System test failed. Error code', text) > self.assertIn('Boot command', text) > diff --git a/tests/acceptance/tesseract_utils.py > b/tests/acceptance/tesseract_utils.py > index acd6e8c2faa..72cd9ab7989 100644 > --- a/tests/acceptance/tesseract_utils.py > +++ b/tests/acceptance/tesseract_utils.py > @@ -6,7 +6,9 @@ > # later. See the COPYING file in the top-level directory. > > import re > +import logging > > +from avocado.utils import process > from avocado.utils.path import find_command, CmdNotFoundError > > def tesseract_available(expected_version): > @@ -26,3 +28,19 @@ def tesseract_available(expected_version): > return False > # now this is guaranteed to be a digit > return int(match.groups()[0]) == expected_version > + > + > +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): > + console_logger = logging.getLogger('tesseract') > + console_logger.debug(image_path) > + if tesseract_version == 4: > + tesseract_args += ' --oem 1' > + proc = process.run("tesseract {} {} stdout".format(tesseract_args, > + image_path)) > + lines = [] > + for line in proc.stdout_text.split('\n'): > + sline = line.strip() > + if len(sline): > + console_logger.debug(sline) > + lines += [sline] > + return lines
Would it make sense to completely hide the tesseract version handling in this new tesseract_utils.py file now, so that the tests themselves do not have to worry about this anymore (i.e. would it be possible to merge test_bootrom_framebuffer_ocr_with_tesseract_v3 and test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test that way?) Thomas