regtest/Printer.py | 10 ++--- regtest/backends/__init__.py | 79 ++++++++++++++++++++++--------------------- 2 files changed, 47 insertions(+), 42 deletions(-)
New commits: commit 817cc333ca8009998f2099583fd0a2fc703f3db3 Author: Carlos Garcia Campos <[email protected]> Date: Fri Nov 29 10:07:16 2013 +0100 regtest: Do not buffer stderr output Some buggy documents can produce a huge stderr output because of parsing errors or whatever. We could give a file directly to Popen to write the stderr file, but we only want to create the file when there's output, because it's what we use to know whether the command produced output or not. So, instead of buffering the whole output and then write it to the file, now we read from the pipe while the command is running, writing the output in chunks to the file. This improves a lot the memory consumption when running some tests. diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index aa12022..b57d8aa 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -18,6 +18,7 @@ import hashlib import os +import select import shutil import errno from Config import Config @@ -193,13 +194,6 @@ class Backend: return False return os.path.exists(test_result + self._diff_ext) - def __create_stderr_file(self, stderr, out_path): - if not stderr: - return - stderr_file = open(out_path + '.stderr', 'wb') - stderr_file.write(stderr) - stderr_file.close() - def __create_failed_file_if_needed(self, status, out_path): if os.WIFEXITED(status) or os.WEXITSTATUS(status) == 0: return False @@ -210,10 +204,36 @@ class Backend: return True - def _check_exit_status(self, p, out_path): - stderr = p.stderr.read() - self.__create_stderr_file(stderr, out_path) + def __redirect_stderr_to_file(self, fd, out_path): + stderr_file = None + read_set = [fd] + while read_set: + try: + rlist, wlist, xlist = select.select(read_set, [], []) + except select.error as e: + continue + + if fd in rlist: + try: + chunk = os.read(fd, 1024) + except OSError as e: + if e.errno == errno.EIO: + # Child process finished. + chunk = '' + else: + raise e + if chunk: + if stderr_file is None: + stderr_file = open(out_path + '.stderr', 'wb') + stderr_file.write(chunk) + else: + read_set.remove(fd) + if stderr_file is not None: + stderr_file.close() + + def _check_exit_status(self, p, out_path): + self.__redirect_stderr_to_file(p.stderr.fileno(), out_path) status = p.wait() if not os.WIFEXITED(status): commit f8f82f1cc3a948239a05d7762210a3f244299db6 Author: Carlos Garcia Campos <[email protected]> Date: Fri Nov 29 10:03:24 2013 +0100 regtest: Read test results in chunks to get the md5 digest Some backends can generate huge results, like huge postscript files that we don't want to load in memory to get the md5. So, instead of creating thr md5 object with the entire file, we feed it with chunks of data using the update method. This improves a lot the memory consumption and performance as well. diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index eab154d..aa12022 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -16,7 +16,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -from hashlib import md5 +import hashlib import os import shutil import errno @@ -47,6 +47,14 @@ class Backend: def get_diff_ext(self): return self._diff_ext + def __md5sum(self, ref_path): + md5 = hashlib.md5() + with open(ref_path,'rb') as f: + for chunk in iter(lambda: f.read(128 * md5.block_size), b''): + md5.update(chunk) + + return md5.hexdigest() + def __should_have_checksum(self, entry): if not entry.startswith(self._name): return False @@ -62,9 +70,7 @@ class Backend: if not self.__should_have_checksum(entry): continue ref_path = os.path.join(refs_path, entry) - f = open(ref_path, 'rb') - md5_file.write("%s %s\n" % (md5(f.read()).hexdigest(), ref_path)) - f.close() + md5_file.write("%s %s\n" % (self.__md5sum(ref_path), ref_path)) if delete_refs: os.remove(ref_path) @@ -90,10 +96,9 @@ class Backend: continue result_path = os.path.join(out_path, basename) - f = open(result_path, 'rb') - result_md5sum = md5(f.read()).hexdigest() + + result_md5sum = self.__md5sum(result_path); matched = md5sum == result_md5sum - f.close() if update_refs: result_md5.append("%s %s\n" % (result_md5sum, ref_path)) commit 3444a44397a890dbeb1bd10357dbc8246fd21ad0 Author: Carlos Garcia Campos <[email protected]> Date: Fri Nov 29 10:01:20 2013 +0100 regtest: Remove unused method _check_exit_status2 It was used when the backends ran in parallel odd and even pages, but it's no longer used since threads support was added. diff --git a/regtest/backends/__init__.py b/regtest/backends/__init__.py index ff6ef84..eab154d 100644 --- a/regtest/backends/__init__.py +++ b/regtest/backends/__init__.py @@ -220,26 +220,6 @@ class Backend: return True - def _check_exit_status2(self, p1, p2, out_path): - p1_stderr = p1.stderr.read() - status1 = p1.wait() - p2_stderr = p2.stderr.read() - status2 = p2.wait() - - if p1_stderr or p2_stderr: - self.__create_stderr_file(p1_stderr + p2_stderr, out_path) - - if not os.WIFEXITED(status1) or not os.WIFEXITED(status2): - open(out_path + '.crashed', 'w').close() - return False - - if self.__create_failed_file_if_needed(status1, out_path): - return False - if self.__create_failed_file_if_needed(status2, out_path): - return False - - return True - def _diff_png(self, ref_path, result_path): try: import Image, ImageChops commit 5f825df417947c51943f1db327e1aa6c3faa15b0 Author: Carlos Garcia Campos <[email protected]> Date: Fri Nov 29 09:57:57 2013 +0100 regtest: Do not store the current line in Printer but only its length We are not using the line text anymore, but only the length. diff --git a/regtest/Printer.py b/regtest/Printer.py index 23dfd34..1de693d 100644 --- a/regtest/Printer.py +++ b/regtest/Printer.py @@ -32,19 +32,19 @@ class Printer: self._verbose = Config().verbose self._stream = sys.stdout self._rewrite = self._stream.isatty() and not self._verbose - self._current_line = None + self._current_line_len = 0 self._lock = RLock() Printer.__single = self def _erase_current_line(self): - if self._current_line is None: + if not self._current_line_len: return - line_len = len(self._current_line) + line_len = self._current_line_len self._stream.write('\b' * line_len + ' ' * line_len + '\b' * line_len) - self._current_line = None + self._current_line_len = 0 def _ensure_new_line(self, msg): if not msg.endswith('\n'): @@ -62,7 +62,7 @@ class Printer: with self._lock: self._erase_current_line() self._print(msg) - self._current_line = msg[msg.rfind('\n') + 1:] + self._current_line_len = len(msg[msg.rfind('\n') + 1:]) def printout_ln(self, msg=''): with self._lock: _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
