Hello! We are having some difficulties when using the ns_http command with sites using 8-bit encoding.
The ns_http command does not convert the received data, so we must use the 'encoding convertfrom' command. Sometimes converted strings become corrupted. For example, there is a server with output encoding iso-8859-2: if the server passes 'äöüŁ', then after conversion we get 'äöüŁ' (correct); if the server passes 'ÄÖÜŁ', then after conversion we get 'ÄÖ#' (corrupted). See attached ns_http.test1 for example (test 1.2 fails). Such strings can be found in any 8-bit encoding (to see run attached http_charsets.test with 'pairsTest' constraint enabled). The source for the ns_http command (tclhttp.c) shows that the problem is using the Tcl_NewStringObj on binary input data (8-bit chars). Two solutions come up: 1) Using Tcl_NewByteArrayObj instead of Tcl_NewStringObj; 2) Using Tcl_ExternalToUtf before using Tcl_NewStringObj, i.e. built-in 'encoding convertfrom'. Attached tclhttp.c.binary-externaltoutf patch modifies the ns_http command: 1) the -binary switch is added to the queue/wait/run sub-commands to use of Tcl_NewByteArrayObj on text pages; 2) without -binary the text page will be converted according to the Content-Type header. Note that the second change requires the TCLHTTP_USE_EXTERNALTOUTF to be defined at compile time. The fixed ns_http command can be tested with the attached ns_http.test2 (see 1.2.1 and 1.2.2). More intensive testing of changes can be done with the http_charsets.test (note commented pairsTest constraint). Also I replaced the 'nstest :: http-0.9 -encoding xxx' with 'ns_http run' in existing encoding.test (see encoding_ns_http.test). All data transformations are successfully performed without explicit decoding. Automatic data decoding is convenient to use, but it changes the behavior of ns_http on 8-bit inputs. These changes could break existing code if someone uses ns_http to inter with 8-bit sites (with risk of data corruption). To use the patched version of ns_http, either remove the 'encoding convertfrom' or add the -binary switch. It should be noted that the -binary switch followed by 'encoding convertfrom' will also be useful for 8-bit sites with missing or incorrect Content-Type. Regards, Oleg Oleinick. PS. Attached files: ns_http.test1 - tests for the current version, shows corruption of 8-bit text; ns_http.test2 - tests for the patched version, shows the correct receipt of 8-bit text; tclhttp.c.binary-externaltoutf.patch - patch for changing the ns_http command, adds the -binary switch and text data auto-decoding; http_charsets.test - tests for ns_http, suitable for both the current and the patched version; encoding_ns_http.test - like existing encoding.test, with 'nstest :: http-0.9 -encoding xxx' replaces by new 'ns_http run';
ns_http.test1
Description: Binary data
ns_http.test2
Description: Binary data
diff --git a/nsd/tclhttp.c b/nsd/tclhttp.c index cef2ee7d..2693fcfe 100644 --- a/nsd/tclhttp.c +++ b/nsd/tclhttp.c @@ -40,6 +40,12 @@ #include <openssl/err.h> #endif +/* + * temporary solution for ns_http wait/queue/run -binary + * FIXME: move this definition to nsd.h + */ +#define NS_HTTP_FLAG_BINARY (1<<4) + /* * The maximum number of bytes we can send to TLS * in one operation is 2^14 => 16384 (see RFC 5246). @@ -658,7 +664,7 @@ HttpWaitObjCmd( NsHttpTask *httpPtr = NULL; char *id = NULL, *outputFileName = NULL; - int result = TCL_OK, decompress = 0; + int result = TCL_OK, decompress = 0, binary = 0; Tcl_WideInt spoolLimit = -1; Tcl_Obj *elapsedVarObj = NULL, *resultVarObj = NULL, @@ -677,6 +683,7 @@ HttpWaitObjCmd( {"-outputfile", Ns_ObjvString, &outputFileName, NULL}, {"-spoolsize", Ns_ObjvMemUnit, &spoolLimit, NULL}, {"-decompress", Ns_ObjvBool, &decompress, INT2PTR(NS_TRUE)}, + {"-binary", Ns_ObjvBool, &binary, INT2PTR(NS_TRUE)}, {NULL, NULL, NULL, NULL} }; Ns_ObjvSpec args[] = { @@ -707,6 +714,10 @@ HttpWaitObjCmd( Ns_Log(Warning, "ns_http_wait: -decompress option is deprecated"); httpPtr->flags |= NS_HTTP_FLAG_DECOMPRESS; } + if (binary != 0) { + Ns_Log(Warning, "ns_http_wait: -binary option is deprecated"); + httpPtr->flags |= NS_HTTP_FLAG_BINARY; + } if (spoolLimit > -1) { Ns_Log(Warning, "ns_http_wait: -spoolsize option is deprecated"); httpPtr->spoolLimit = spoolLimit; @@ -1215,7 +1226,7 @@ HttpQueue( bool run ) { Tcl_Interp *interp; - int result = TCL_OK, decompress = 0; + int result = TCL_OK, decompress = 0, binary = 0; Tcl_WideInt spoolLimit = -1; int verifyCert = 0, keepHostHdr = 0; NsHttpTask *httpPtr = NULL; @@ -1239,6 +1250,7 @@ HttpQueue( Ns_ObjvValueRange sizeRange = {0, LLONG_MAX}; Ns_ObjvSpec opts[] = { + {"-binary", Ns_ObjvBool, &binary, INT2PTR(NS_TRUE)}, {"-body", Ns_ObjvObj, &bodyObj, NULL}, {"-body_size", Ns_ObjvWideInt, &bodySize, &sizeRange}, {"-body_file", Ns_ObjvString, &bodyFileName, NULL}, @@ -1358,7 +1370,6 @@ HttpQueue( HttpSpliceChannels(interp, httpPtr); HttpClose(httpPtr); } - } else { /* @@ -1376,6 +1387,9 @@ HttpQueue( if (decompress != 0) { httpPtr->flags |= NS_HTTP_FLAG_DECOMPRESS; } + if (binary != 0) { + httpPtr->flags |= NS_HTTP_FLAG_BINARY; + } httpPtr->servPtr = itPtr->servPtr; httpPtr->task = Ns_TaskTimedCreate(httpPtr->sock, HttpProc, httpPtr, expirePtr); @@ -1483,7 +1497,6 @@ HttpGetResult( NS_NONNULL_ASSERT(interp != NULL); NS_NONNULL_ASSERT(httpPtr != NULL); - if (httpPtr->error != NULL) { if (httpPtr->finalSockState == NS_SOCK_TIMEOUT) { Tcl_SetErrorCode(interp, errorCodeTimeoutString, (char *)0L); @@ -1495,6 +1508,9 @@ HttpGetResult( } if (httpPtr->recvSpoolMode == NS_FALSE) { +#if defined(TCLHTTP_USE_EXTERNALTOUTF) + Tcl_Encoding encoding = NULL; +#endif bool binary = NS_FALSE; int cSize; char *cData; @@ -1517,6 +1533,9 @@ HttpGetResult( binary = NS_TRUE; } } + if ((httpPtr->flags & NS_HTTP_FLAG_BINARY) != 0u) { + binary = NS_TRUE; + } if (binary == NS_FALSE) { char *cType = NULL; @@ -1529,6 +1548,14 @@ HttpGetResult( * completely regular text formats! */ binary = Ns_IsBinaryMimeType(cType); +#if defined(TCLHTTP_USE_EXTERNALTOUTF) + if (binary == NS_FALSE) { + encoding = Ns_GetTypeEncoding(cType); + if (encoding == NULL) { + encoding = NS_utf8Encoding; + } + } +#endif } } @@ -1538,7 +1565,15 @@ HttpGetResult( if (binary == NS_TRUE) { replyBodyObj = Tcl_NewByteArrayObj((unsigned char *)cData, cSize); } else { +#if defined(TCLHTTP_USE_EXTERNALTOUTF) + Tcl_DString ds; + Tcl_DStringInit(&ds); + Tcl_ExternalToUtfDString(encoding, cData, cSize, &ds); + replyBodyObj = Tcl_NewStringObj(Tcl_DStringValue(&ds), -1); + Tcl_DStringFree(&ds); +#else replyBodyObj = Tcl_NewStringObj(cData, cSize); +#endif } }
http_charsets.test
Description: Binary data
encoding_ns_http.test
Description: Binary data
_______________________________________________ naviserver-devel mailing list naviserver-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/naviserver-devel