On Thursday 13 August 2015 19:10:41 Andries E. Brouwer wrote: > On Thu, Aug 13, 2015 at 05:54:57PM +0200, Tim Ruehsen wrote: > > I just made up a test case, but can't apply your patch. > > > > Please rebase to latest git master and generate your patch with > > git format-patch and send it as attachment. Thanks. > > OK, see attached. > > Andries
Based on that, and your proposal about the progress bar, I made up a bunch of patches. The new test case is not yet ready. @Andries: Maybe you can put a few more test cases into that (or send me a few examples that should work). I also would like to see broken UTF-8 sequences in this test. @Darshit Could you have a closer look into the patches, please ? Neither is python nor the progress code my playground... you are the expert here. Tim
From 1ae1aeda78d83e570fe7ee5881c7e9caf182e991 Mon Sep 17 00:00:00 2001 From: "Andries E. Brouwer" <[email protected]> Date: Thu, 13 Aug 2015 19:06:03 +0200 Subject: [PATCH 1/4] Do not escape high control bytes on a UTF-8 system. --- src/init.c | 26 +++++++++++++++++++++++++- src/options.h | 1 + src/url.c | 12 +++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/init.c b/src/init.c index ea074cc..6f71de1 100644 --- a/src/init.c +++ b/src/init.c @@ -348,6 +348,27 @@ command_by_name (const char *cmdname) return -1; } + +/* Used to determine whether bytes 128-159 are OK in a filename */ +static int +have_utf8_locale() { +#if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__) + /* insert some test for Windows */ +#else + char *p; + + p = getenv("LC_ALL"); + if (p == NULL) + p = getenv("LC_CTYPE"); + if (p == NULL) + p = getenv("LANG"); + if (strstr(p, "UTF-8") != NULL || strstr(p, "UTF8") != NULL || + strstr(p, "utf-8") != NULL || strstr(p, "utf8") != NULL) + return true; +#endif + return false; +} + /* Reset the variables to default values. */ void defaults (void) @@ -419,6 +440,7 @@ defaults (void) opt.restrict_files_os = restrict_unix; #endif opt.restrict_files_ctrl = true; + opt.restrict_files_highctrl = (have_utf8_locale() ? false : true); opt.restrict_files_nonascii = false; opt.restrict_files_case = restrict_no_case_restriction; @@ -1487,6 +1509,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno { int restrict_os = opt.restrict_files_os; int restrict_ctrl = opt.restrict_files_ctrl; + int restrict_highctrl = opt.restrict_files_highctrl; int restrict_case = opt.restrict_files_case; int restrict_nonascii = opt.restrict_files_nonascii; @@ -1511,7 +1534,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno else if (VAL_IS ("uppercase")) restrict_case = restrict_uppercase; else if (VAL_IS ("nocontrol")) - restrict_ctrl = false; + restrict_ctrl = restrict_highctrl = false; else if (VAL_IS ("ascii")) restrict_nonascii = true; else @@ -1532,6 +1555,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno opt.restrict_files_os = restrict_os; opt.restrict_files_ctrl = restrict_ctrl; + opt.restrict_files_highctrl = restrict_highctrl; opt.restrict_files_case = restrict_case; opt.restrict_files_nonascii = restrict_nonascii; diff --git a/src/options.h b/src/options.h index 24ddbb5..083d16b 100644 --- a/src/options.h +++ b/src/options.h @@ -251,6 +251,7 @@ struct options bool restrict_files_ctrl; /* non-zero if control chars in URLs are restricted from appearing in generated file names. */ + bool restrict_files_highctrl; /* idem for bytes 128-159 */ bool restrict_files_nonascii; /* non-zero if bytes with values greater than 127 are restricted. */ enum { diff --git a/src/url.c b/src/url.c index 73c8dd0..e98bfaa 100644 --- a/src/url.c +++ b/src/url.c @@ -1348,7 +1348,8 @@ enum { filechr_not_unix = 1, /* unusable on Unix, / and \0 */ filechr_not_vms = 2, /* unusable on VMS (ODS5), 0x00-0x1F * ? */ filechr_not_windows = 4, /* unusable on Windows, one of \|/<>?:*" */ - filechr_control = 8 /* a control character, e.g. 0-31 */ + filechr_control = 8, /* a control character, e.g. 0-31 */ + filechr_highcontrol = 16 /* a high control character, in 128-159 */ }; #define FILE_CHAR_TEST(c, mask) \ @@ -1360,6 +1361,7 @@ enum { #define V filechr_not_vms #define W filechr_not_windows #define C filechr_control +#define Z filechr_highcontrol #define UVWC U|V|W|C #define UW U|W @@ -1392,8 +1394,8 @@ UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */ + Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, /* 128-143 */ + Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, /* 144-159 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1406,6 +1408,7 @@ UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */ #undef V #undef W #undef C +#undef Z #undef UW #undef UVWC #undef VC @@ -1448,8 +1451,11 @@ append_uri_pathel (const char *b, const char *e, bool escaped, mask = filechr_not_vms; else mask = filechr_not_windows; + if (opt.restrict_files_ctrl) mask |= filechr_control; + if (opt.restrict_files_highctrl) + mask |= filechr_highcontrol; /* Copy [b, e) to PATHEL and URL-unescape it. */ if (escaped) -- 2.5.0
From 5f58576fb78be7af205ca86ee51362e653ed8772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim Rühsen?= <[email protected]> Date: Mon, 17 Aug 2015 13:03:25 +0200 Subject: [PATCH 2/4] Fix progress bar for multibyte filenames * progress.c (create_image): Fix filename padding Reported-by: "Andries E. Brouwer" <[email protected]> --- src/progress.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/progress.c b/src/progress.c index 61b635d..d97e329 100644 --- a/src/progress.c +++ b/src/progress.c @@ -950,10 +950,8 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done) if (orig_filename_cols <= MAX_FILENAME_COLS) { - int padding = MAX_FILENAME_COLS - orig_filename_cols; - sprintf (p, "%s ", bp->f_download); - p += orig_filename_cols + 1; - for (;padding;padding--) + p += sprintf (p, "%s ", bp->f_download); + while (p < bp->buffer + MAX_FILENAME_COLS) *p++ = ' '; } else -- 2.5.0
From 0f5fa65964bac7a80da6dfd7c32b5503494da76f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim Rühsen?= <[email protected]> Date: Mon, 17 Aug 2015 13:05:53 +0200 Subject: [PATCH 3/4] Test unescaped URL names in python test suite * testenv/server/http/http_server.py (send_head): Check unescaped URLs This patch allows us to use UTF-8 file names in our python tests. --- testenv/server/http/http_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testenv/server/http/http_server.py b/testenv/server/http/http_server.py index 85769c4..40caddb 100644 --- a/testenv/server/http/http_server.py +++ b/testenv/server/http/http_server.py @@ -8,6 +8,7 @@ from hashlib import md5 import threading import socket import os +import urllib class StoppableHTTPServer(HTTPServer): @@ -387,7 +388,7 @@ class _Handler(BaseHTTPRequestHandler): if self.path == "/": path = "index.html" else: - path = self.path[1:] + path = urllib.parse.unquote(self.path[1:]) self.__log_request(method) -- 2.5.0
From 420b9f3923b4015f233bc22881f6e6e9edde8980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim Rühsen?= <[email protected]> Date: Mon, 17 Aug 2015 13:10:42 +0200 Subject: [PATCH 4/4] Add new test testenv/Test-not-escaping.py --- testenv/Test-not-escaping.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100755 testenv/Test-not-escaping.py diff --git a/testenv/Test-not-escaping.py b/testenv/Test-not-escaping.py new file mode 100755 index 0000000..7c48f7f --- /dev/null +++ b/testenv/Test-not-escaping.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile + +""" + This test ensures that Wget correctly handles the -O command for output + filenames. +""" +TEST_NAME = "Output Filename Command" +############# File Definitions ############################################### +File1 = "Test Contents." + +A_File = WgetFile ("Сердце", File1) + +WGET_OPTIONS = "-d" +#WGET_URLS = [["%D0%A1%D0%B5%D1%80%D0%B4%D1%86%D0%B5"]] +WGET_URLS = [["Сердце"]] + +Files = [[A_File]] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [WgetFile ("Сердце", File1)] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode +} + +err = HTTPTest ( + name=TEST_NAME, + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +).begin () + +exit (err) -- 2.5.0
signature.asc
Description: This is a digitally signed message part.
