CR LF are 0D 37 in EBCDIC. Those have protocol specific meanings.

NL in EBCDIC or ASCII has no specific meaning, it is opaque text. It's not
an HTTP CTRL char.

However, wouldn't we need to escape it in a shell cmd? We might want to
consider escaping many C1 ctrls in the shell.

On Jul 30, 2016 8:25 AM, "Eric Covener" <cove...@gmail.com> wrote:

> What's going on with 0x15 (newline in ebcdic)?  It is a control char
> that we map to 0x85 but it doesn't look right int he generated table.
>
> On Fri, Jul 29, 2016 at 9:37 PM, Eric Covener <cove...@gmail.com> wrote:
> > from rev below:
> > http://people.apache.org/~covener/test_char.h
> >
> >
> >
> > On Fri, Jul 29, 2016 at 6:00 PM,  <wr...@apache.org> wrote:
> >> Author: wrowe
> >> Date: Fri Jul 29 22:00:52 2016
> >> New Revision: 1754579
> >>
> >> URL: http://svn.apache.org/viewvc?rev=1754579&view=rev
> >> Log:
> >> Replacement solution to identify VCHAR/ASCII symbols, even in EBCDIC.
> >>
> >> Looking for someone with an EBCDIC environment to post the output of
> >> the test_char.h generated file for verification.
> >>
> >>
> >> Modified:
> >>     httpd/httpd/trunk/server/gen_test_char.c
> >>
> >> Modified: httpd/httpd/trunk/server/gen_test_char.c
> >> URL:
> http://svn.apache.org/viewvc/httpd/httpd/trunk/server/gen_test_char.c?rev=1754579&r1=1754578&r2=1754579&view=diff
> >>
> ==============================================================================
> >> --- httpd/httpd/trunk/server/gen_test_char.c (original)
> >> +++ httpd/httpd/trunk/server/gen_test_char.c Fri Jul 29 22:00:52 2016
> >> @@ -20,6 +20,7 @@
> >>  #define apr_isalpha(c) (isalpha(((unsigned char)(c))))
> >>  #define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
> >>  #define apr_isprint(c) (isprint(((unsigned char)(c))))
> >> +#define apr_isascii(c) (isascii(((unsigned char)(c))))
> >>  #include <ctype.h>
> >>  #define APR_HAVE_STDIO_H 1
> >>  #define APR_HAVE_STRING_H 1
> >> @@ -31,6 +32,48 @@
> >>
> >>  #endif
> >>
> >> +#if APR_CHARSET_EBCDIC
> >> +/* See util.c for complete explanation of this table */
> >> +static const short ucharmap[] = {
> >> +    0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
> >> +    0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
> >> +    0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
> >> +    0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
> >> +    0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
> >> +    0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
> >> +    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
> >> +    0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
> >> +    0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
> >> +    0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
> >> +    0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
> >> +    0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
> >> +    0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
> >> +    0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
> >> +    0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
> >> +    0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
> >> +    0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
> >> +    0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
> >> +    0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
> >> +    0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
> >> +    0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
> >> +    0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
> >> +    0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
> >> +    0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
> >> +    0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
> >> +    0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
> >> +    0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
> >> +    0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
> >> +    0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
> >> +    0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
> >> +    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
> >> +    0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
> >> +};
> >> +#define test_isascii_equiv(c) ((ucharmap[(unsigned char)c] & ~0x7f) ==
> 0)
> >> +#else
> >> +#define test_isascii_equiv(c) apr_isascii(c)
> >> +#endif
> >> +
> >> +
> >>  #if defined(WIN32) || defined(OS2)
> >>  #define NEED_ENHANCED_ESCAPES
> >>  #endif
> >> @@ -120,19 +163,20 @@ int main(int argc, char *argv[])
> >>
> >>          /* Stop for any non-'token' character, including ctrls,
> obs-text,
> >>           * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616)
> >> -         * XXX: We need to build a specific table for EBCDIC values
> with
> >> -         * ASCII equivilants here
> >> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our
> EBCDIC table
> >> +         * are captured by apr_iscntrl()
> >>           */
> >> -        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}",
> c)) {
> >> +        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c)
> >> +               || !test_isascii_equiv(c)) {
> >>              flags |= T_HTTP_TOKEN_STOP;
> >>          }
> >>
> >>          /* Catch CTRLs other than VCHAR, HT and SP, and obs-text
> (RFC7230 3.2)
> >>           * This includes only the C0 plane, not C1 (which is obs-text
> itself.)
> >> -         * XXX: Need to constrain iscntrl to C0 equivilants in ASCII,
> >> -         * even on EBCDIC architecture
> >> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our
> EBCDIC table
> >> +         * are captured by apr_iscntrl()
> >>           */
> >> -        if (!c || (apr_iscntrl(c) && c != '\t')) {
> >> +        if (!c || (apr_iscntrl(c) && c != '\t' &&
> test_isascii_equiv(c))) {
> >>              flags |= T_HTTP_CTRLS;
> >>          }
> >>
> >>
> >>
> >
> >
> >
> > --
> > Eric Covener
> > cove...@gmail.com
>
>
>
> --
> Eric Covener
> cove...@gmail.com
>

Reply via email to