I don't know how long this has been true, but I recently noticed that some recursive HTTP fetch operations were failing (on VMS) because the URLs contained a "?", and the code in src/url.c (et al.) thought that this was a problem in file names on only Windows. For example (1.16.3):
ALP $ wgo --user-agent=mozilla "http://www.google.com/search?source=hp&q=fred" --2015-03-31 23:52:13-- http://www.google.com/search?source=hp&q=fred Resolving www.google.com... 74.125.198.99, 74.125.198.103, 74.125.198.104, ... Connecting to www.google.com|74.125.198.99|:80... connected. HTTP request sent, awaiting response... 200 OK Length: unspecified [text/html] search!source=hp&q=fred: i/o error Cannot write to 'search!source=hp&q=fred' (error 0). (Interestingly, in 1.16.1, that last error message was more informative: Cannot write to 'search!source=hp&q=fred' (i/o error). but I haven't investigated.) Adding a VMS option to the restrict_files_os stuff, and treating VMS like Windows for FN_QUERY_SEP and FN_QUERY_SEP_STR seems to solve the problem (at least on an ODS5 volume): ALP $ wgx --user-agent=mozilla "http://www.google.com/search?source=hp&q=fred" --2015-03-31 23:39:35-- http://www.google.com/search?source=hp&q=fred Resolving www.google.com... 74.125.198.147, 74.125.198.99, 74.125.198.103, ... Connecting to www.google.com|74.125.198.147|:80... connected. HTTP request sent, awaiting response... 200 OK Length: unspecified [text/html] Saving to: 'search@source=hp&q=fred' search@source=hp&q= [ <=> ] 37.78K 174KB/s in 0.2s 2015-03-31 23:39:36 (174 KB/s) - 'search@source=hp&q=fred' saved [38691] ALP $ dire search* [...] search^@source^=hp^&q^=fred.;1 I haven't looked at the documentation, but the following code changes seem plausible to me: diff -ru wget-1_16_3a_vms/src/init.c wget-1_16_3/src/init.c --- wget-1_16_3a_vms/src/init.c 2015-01-30 17:25:57 -0600 +++ wget-1_16_3/src/init.c 2015-03-31 22:46:59 -0500 @@ -397,6 +397,8 @@ /* The default for file name restriction defaults to the OS type. */ #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__) opt.restrict_files_os = restrict_windows; +#elif defined(__VMS) + opt.restrict_files_os = restrict_vms; #else opt.restrict_files_os = restrict_unix; #endif @@ -1481,6 +1483,8 @@ if (VAL_IS ("unix")) restrict_os = restrict_unix; + else if (VAL_IS ("vms")) + restrict_os = restrict_vms; else if (VAL_IS ("windows")) restrict_os = restrict_windows; else if (VAL_IS ("lowercase")) @@ -1495,7 +1499,7 @@ { fprintf (stderr, _("\ %s: %s: Invalid restriction %s,\n\ - use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"), + use [unix|vms|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"), exec_name, com, quote (val)); return false; } diff -ru wget-1_16_3a_vms/src/options.h wget-1_16_3/src/options.h --- wget-1_16_3a_vms/src/options.h 2015-01-30 17:25:57 -0600 +++ wget-1_16_3/src/options.h 2015-03-31 22:37:59 -0500 @@ -239,6 +239,7 @@ enum { restrict_unix, + restrict_vms, restrict_windows } restrict_files_os; /* file name restriction ruleset. */ bool restrict_files_ctrl; /* non-zero if control chars in URLs diff -ru wget-1_16_3a_vms/src/url.c wget-1_16_3/src/url.c --- wget-1_16_3a_vms/src/url.c 2015-02-23 09:10:22 -0600 +++ wget-1_16_3/src/url.c 2015-03-31 23:09:48 -0500 @@ -1328,8 +1328,9 @@ enum { filechr_not_unix = 1, /* unusable on Unix, / and \0 */ - filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */ - filechr_control = 4 /* a control character, e.g. 0-31 */ + filechr_not_vms = 2, /* unusable on VMS (ODS5), 0x00-0x1F * ? */ + filechr_not_windows = 4, /* unusable on Windows, one of \|/<>?:*" */ + filechr_control = 8 /* a control character, e.g. 0-31 */ }; #define FILE_CHAR_TEST(c, mask) \ @@ -1338,11 +1339,14 @@ /* Shorthands for the table: */ #define U filechr_not_unix +#define V filechr_not_vms #define W filechr_not_windows #define C filechr_control +#define UVWC U|V|W|C #define UW U|W -#define UWC U|W|C +#define VC V|C +#define VW V|W /* Table of characters unsafe under various conditions (see above). @@ -1353,22 +1357,22 @@ static const unsigned char filechr_table[256] = { -UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ - C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */ - C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ - C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ - 0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */ - 0, 0, W, 0, 0, 0, 0, UW, /* ( ) * + , - . / */ - 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */ - 0, 0, W, 0, W, 0, W, W, /* 8 9 : ; < = > ? */ - 0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */ - 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */ - 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */ - 0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */ - 0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ - 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ - 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ - 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */ +UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */ + VC, VC, VC, VC, VC, VC, VC, VC, /* BS HT LF VT FF CR SO SI */ + VC, VC, VC, VC, VC, VC, VC, VC, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ + VC, VC, VC, VC, VC, VC, VC, VC, /* CAN EM SUB ESC FS GS RS US */ + 0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */ + 0, 0, VW, 0, 0, 0, 0, UW, /* ( ) * + , - . / */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */ + 0, 0, W, 0, W, 0, W, VW, /* 8 9 : ; < = > ? */ + 0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */ + 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */ + 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */ + 0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */ + 0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */ + 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */ + 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ + 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */ C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */ @@ -1381,10 +1385,13 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; #undef U +#undef V #undef W #undef C #undef UW -#undef UWC +#undef UVWC +#undef VC +#undef VW /* FN_PORT_SEP is the separator between host and port in file names for non-standard port numbers. On Unix this is normally ':', as in @@ -1393,10 +1400,14 @@ #define FN_PORT_SEP (opt.restrict_files_os != restrict_windows ? ':' : '+') /* FN_QUERY_SEP is the separator between the file name and the URL - query, normally '?'. Since Windows cannot handle '?' as part of + query, normally '?'. Because VMS and Windows cannot handle '?' in a file name, we use '@' instead there. */ -#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@') -#define FN_QUERY_SEP_STR (opt.restrict_files_os != restrict_windows ? "?" : "@") +#define FN_QUERY_SEP \ + (((opt.restrict_files_os != restrict_vms) && \ + (opt.restrict_files_os != restrict_windows)) ? '?' : '@') +#define FN_QUERY_SEP_STR \ + (((opt.restrict_files_os != restrict_vms) && \ + (opt.restrict_files_os != restrict_windows)) ? "?" : "@") /* Quote path element, characters in [b, e), as file name, and append the quoted string to DEST. Each character is quoted as per @@ -1415,6 +1426,8 @@ int mask; if (opt.restrict_files_os == restrict_unix) mask = filechr_not_unix; + else if (opt.restrict_files_os == restrict_vms) + mask = filechr_not_vms; else mask = filechr_not_windows; if (opt.restrict_files_ctrl) I assume that I've violated some style standard(s), but I'm old and hard to educate. ("git"? What's a "git"?) Please let me know if I need to do more to persuade you to adopt/adapt a change set like this, or if there's a better way. Thanks, as always. ------------------------------------------------------------------------ Steven M. Schweda sms@antinode-info 382 South Warwick Street (+1) 651-699-9818 Saint Paul MN 55105-2547