Hello!

We are having some difficulties when using the ns_http command with
sites using 8-bit encoding.

The ns_http command does not convert the received data, so we must use
the 'encoding convertfrom' command. Sometimes converted strings become
corrupted. For example, there is a server with output encoding
iso-8859-2:
if the server passes 'äöüŁ', then after conversion we get 'äöüŁ'
(correct);
if the server passes 'ÄÖÜŁ', then after conversion we get 'ÄÖ#'
(corrupted).
See attached ns_http.test1 for example (test 1.2 fails).

Such strings can be found in any 8-bit encoding (to see run attached
http_charsets.test with 'pairsTest' constraint enabled).
The source for the ns_http command (tclhttp.c) shows that the problem is
using the Tcl_NewStringObj on binary input data (8-bit chars).

Two solutions come up:
1) Using Tcl_NewByteArrayObj instead of Tcl_NewStringObj;
2) Using Tcl_ExternalToUtf before using Tcl_NewStringObj, i.e. built-in
'encoding convertfrom'.

Attached tclhttp.c.binary-externaltoutf patch modifies the ns_http
command:
1) the -binary switch is added to the queue/wait/run sub-commands to use
of Tcl_NewByteArrayObj on text pages;
2) without -binary the text page will be converted according to the
Content-Type header.

Note that the second change requires the TCLHTTP_USE_EXTERNALTOUTF to
be defined at compile time.

The fixed ns_http command can be tested with the attached ns_http.test2
(see 1.2.1 and 1.2.2). More intensive testing of changes can be done
with the http_charsets.test (note commented pairsTest
constraint).
Also I replaced the 'nstest :: http-0.9 -encoding xxx' with 'ns_http
run' in existing encoding.test (see encoding_ns_http.test). All data
transformations are successfully performed without explicit decoding.

Automatic data decoding is convenient to use, but it changes the
behavior of ns_http on 8-bit inputs. These changes could break existing
code if someone uses ns_http to inter with 8-bit sites (with risk of
data corruption). To use the patched version of ns_http, either remove
the 'encoding convertfrom' or add the -binary switch.

It should be noted that the -binary switch followed by 'encoding
convertfrom' will also be useful for 8-bit sites with missing or
incorrect Content-Type.

Regards,
Oleg Oleinick.

PS. Attached files:

ns_http.test1 - tests for the current version, shows corruption of
8-bit text;

ns_http.test2 - tests for the patched version, shows the correct
receipt of 8-bit text;

tclhttp.c.binary-externaltoutf.patch - patch for changing the ns_http
command, adds the -binary switch and text data auto-decoding;

http_charsets.test - tests for ns_http, suitable for both the current
and the patched version;

encoding_ns_http.test - like existing encoding.test, with 'nstest ::
http-0.9 -encoding xxx' replaces by new 'ns_http run';

Attachment: ns_http.test1
Description: Binary data

Attachment: ns_http.test2
Description: Binary data

diff --git a/nsd/tclhttp.c b/nsd/tclhttp.c
index cef2ee7d..2693fcfe 100644
--- a/nsd/tclhttp.c
+++ b/nsd/tclhttp.c
@@ -40,6 +40,12 @@
 #include <openssl/err.h>
 #endif
 
+/*
+ * temporary solution for ns_http wait/queue/run -binary
+ * FIXME: move this definition to nsd.h
+ */
+#define NS_HTTP_FLAG_BINARY    (1<<4)
+
 /*
  * The maximum number of bytes we can send to TLS
  * in one operation is 2^14 => 16384 (see RFC 5246).
@@ -658,7 +664,7 @@ HttpWaitObjCmd(
     NsHttpTask *httpPtr = NULL;
 
     char       *id = NULL, *outputFileName = NULL;
-    int         result = TCL_OK, decompress = 0;
+    int         result = TCL_OK, decompress = 0, binary = 0;
     Tcl_WideInt spoolLimit = -1;
     Tcl_Obj    *elapsedVarObj = NULL,
                *resultVarObj = NULL,
@@ -677,6 +683,7 @@ HttpWaitObjCmd(
         {"-outputfile", Ns_ObjvString,  &outputFileName,  NULL},
         {"-spoolsize",  Ns_ObjvMemUnit, &spoolLimit,      NULL},
         {"-decompress", Ns_ObjvBool,    &decompress,      INT2PTR(NS_TRUE)},
+        {"-binary",     Ns_ObjvBool,    &binary,          INT2PTR(NS_TRUE)},
         {NULL,          NULL,           NULL,             NULL}
     };
     Ns_ObjvSpec args[] = {
@@ -707,6 +714,10 @@ HttpWaitObjCmd(
             Ns_Log(Warning, "ns_http_wait: -decompress option is deprecated");
             httpPtr->flags |= NS_HTTP_FLAG_DECOMPRESS;
         }
+        if (binary != 0) {
+            Ns_Log(Warning, "ns_http_wait: -binary option is deprecated");
+            httpPtr->flags |= NS_HTTP_FLAG_BINARY;
+        }
         if (spoolLimit > -1) {
             Ns_Log(Warning, "ns_http_wait: -spoolsize option is deprecated");
             httpPtr->spoolLimit = spoolLimit;
@@ -1215,7 +1226,7 @@ HttpQueue(
     bool run
 ) {
     Tcl_Interp *interp;
-    int         result = TCL_OK, decompress = 0;
+    int         result = TCL_OK, decompress = 0, binary = 0;
     Tcl_WideInt spoolLimit = -1;
     int         verifyCert = 0, keepHostHdr = 0;
     NsHttpTask *httpPtr = NULL;
@@ -1239,6 +1250,7 @@ HttpQueue(
     Ns_ObjvValueRange sizeRange = {0, LLONG_MAX};
 
     Ns_ObjvSpec opts[] = {
+        {"-binary",           Ns_ObjvBool,    &binary,         INT2PTR(NS_TRUE)},
         {"-body",             Ns_ObjvObj,     &bodyObj,        NULL},
         {"-body_size",        Ns_ObjvWideInt, &bodySize,       &sizeRange},
         {"-body_file",        Ns_ObjvString,  &bodyFileName,   NULL},
@@ -1358,7 +1370,6 @@ HttpQueue(
             HttpSpliceChannels(interp, httpPtr);
             HttpClose(httpPtr);
         }
-
     } else {
 
         /*
@@ -1376,6 +1387,9 @@ HttpQueue(
         if (decompress != 0) {
             httpPtr->flags |= NS_HTTP_FLAG_DECOMPRESS;
         }
+        if (binary != 0) {
+            httpPtr->flags |= NS_HTTP_FLAG_BINARY;
+        }
         httpPtr->servPtr = itPtr->servPtr;
 
         httpPtr->task = Ns_TaskTimedCreate(httpPtr->sock, HttpProc, httpPtr, expirePtr);
@@ -1483,7 +1497,6 @@ HttpGetResult(
 
     NS_NONNULL_ASSERT(interp != NULL);
     NS_NONNULL_ASSERT(httpPtr != NULL);
-
     if (httpPtr->error != NULL) {
         if (httpPtr->finalSockState == NS_SOCK_TIMEOUT) {
             Tcl_SetErrorCode(interp, errorCodeTimeoutString, (char *)0L);
@@ -1495,6 +1508,9 @@ HttpGetResult(
     }
 
     if (httpPtr->recvSpoolMode == NS_FALSE) {
+#if defined(TCLHTTP_USE_EXTERNALTOUTF)
+        Tcl_Encoding encoding = NULL; 
+#endif
         bool   binary = NS_FALSE;
         int    cSize;
         char  *cData;
@@ -1517,6 +1533,9 @@ HttpGetResult(
                 binary = NS_TRUE;
             }
         }
+        if ((httpPtr->flags & NS_HTTP_FLAG_BINARY) != 0u) {
+            binary = NS_TRUE;
+        }
         if (binary == NS_FALSE) {
             char  *cType = NULL;
 
@@ -1529,6 +1548,14 @@ HttpGetResult(
                  * completely regular text formats!
                  */
                 binary = Ns_IsBinaryMimeType(cType);
+#if defined(TCLHTTP_USE_EXTERNALTOUTF)
+                if (binary == NS_FALSE) {
+                    encoding = Ns_GetTypeEncoding(cType);
+                    if (encoding == NULL) {
+                        encoding = NS_utf8Encoding;
+                    }
+                }
+#endif
             }
         }
 
@@ -1538,7 +1565,15 @@ HttpGetResult(
         if (binary == NS_TRUE)  {
             replyBodyObj = Tcl_NewByteArrayObj((unsigned char *)cData, cSize);
         } else {
+#if defined(TCLHTTP_USE_EXTERNALTOUTF)
+            Tcl_DString ds;
+            Tcl_DStringInit(&ds);
+            Tcl_ExternalToUtfDString(encoding, cData, cSize, &ds);
+            replyBodyObj = Tcl_NewStringObj(Tcl_DStringValue(&ds), -1);
+            Tcl_DStringFree(&ds);
+#else
             replyBodyObj = Tcl_NewStringObj(cData, cSize);
+#endif
         }
     }
 

Attachment: http_charsets.test
Description: Binary data

Attachment: encoding_ns_http.test
Description: Binary data

_______________________________________________
naviserver-devel mailing list
naviserver-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/naviserver-devel

Reply via email to