On 12/20/23 11:46 PM, minf...@apache.org wrote:
> Author: minfrin
> Date: Wed Dec 20 22:46:22 2023
> New Revision: 1914814
>
> URL: http://svn.apache.org/viewvc?rev=1914814&view=rev
> Log:
> apr_escape: Add apr_escape_json() and apr_pescape_json().
>
> Modified:
> apr/apr/trunk/CHANGES
> apr/apr/trunk/encoding/apr_escape.c
> apr/apr/trunk/include/apr_escape.h
> apr/apr/trunk/test/testescape.c
> apr/apr/trunk/tools/gen_test_char.c
>
> Modified: apr/apr/trunk/encoding/apr_escape.c
> URL:
> http://svn.apache.org/viewvc/apr/apr/trunk/encoding/apr_escape.c?rev=1914814&r1=1914813&r2=1914814&view=diff
> ==============================================================================
> --- apr/apr/trunk/encoding/apr_escape.c (original)
> +++ apr/apr/trunk/encoding/apr_escape.c Wed Dec 20 22:46:22 2023
> @@ -36,7 +36,7 @@
> * char in here and get it to work, because if char is signed then it
> * will first be sign extended.
> */
> -#define TEST_CHAR(c, f) (test_char_table[(unsigned)(c)] & (f))
> +#define TEST_CHAR(c, f) (test_char_table[(apr_uint16_t)(c)] & (f))
>
> APR_DECLARE(apr_status_t) apr_escape_shell(char *escaped, const char *str,
> apr_ssize_t slen, apr_size_t *len)
> @@ -1209,6 +1209,271 @@ APR_DECLARE(const char *) apr_pescape_ld
> }
> }
>
> + return src;
> +}
> +
> +APR_DECLARE(apr_status_t) apr_escape_json(char *escaped, const char *str,
> + apr_ssize_t slen, int quote, apr_size_t *len)
Shouldn't we provide a length for the escaped buffer to be able to do some
sanity checking against overflows?
We could ignore this parameter in case that escaped is NULL.
Otherwise I think we should have a big fat warning in the doxygen documentation
of this function that it
is extremely dangerous to call this function without providing a buffer of
sufficient length and that
apr_escape_json should be called with escaped equal NULL before to determine
this length, just like as
we do in apr_pescape_json.
> +{
> + apr_size_t size = 1;
> + int found = quote;
> + int error = 0;
> + const unsigned char *s = (const unsigned char *) str;
> + unsigned char *d = (unsigned char *) escaped;
> + unsigned c;
> + const char invalid[3] = { 0xEF, 0xBF, 0xBD };
> +
> + if (s) {
> + if (d) {
> + if (quote) {
> + *d++ = '"';
> + size += 1;
> + }
> + while ((c = *s) && slen) {
> + if (TEST_CHAR(c, T_ESCAPE_JSON)) {
> + switch (c) {
> + case '\b':
> + *d++ = '\\';
> + *d++ = 'b';
> + size += 2;
> + found = 1;
> + break;
> + case '\f':
> + *d++ = '\\';
> + *d++ = 'f';
> + size += 2;
> + found = 1;
> + break;
> + case '\n':
> + *d++ = '\\';
> + *d++ = 'n';
> + size += 2;
> + found = 1;
> + break;
> + case '\r':
> + *d++ = '\\';
> + *d++ = 'r';
> + size += 2;
> + found = 1;
> + break;
> + case '\t':
> + *d++ = '\\';
> + *d++ = 't';
> + size += 2;
> + found = 1;
> + break;
> + case '\\':
> + *d++ = '\\';
> + *d++ = '\\';
> + size += 2;
> + found = 1;
> + break;
> + case '"':
> + *d++ = '\\';
> + *d++ = '"';
> + size += 2;
> + found = 1;
> + break;
> + default:
> + if (c < 0x20) {
> + size += apr_snprintf((char *)d, 6, "\\u%04x", c);
> + d += 5;
> + found = 1;
> + }
> + else if (((c >> 7) == 0x00)) {
How can this case happen? c's like this should fail the TEST_CHAR(c,
T_ESCAPE_JSON) test.
> + /* 1 byte */
> + memcpy(d, s, 1);
> + d += 1;
> + size += 1;
> + }
> + else if ((slen < 0 || slen > 1) && ((c >> 5) ==
> 0x06) && s[1]) {
> + /* 2 bytes */
> + if ((slen > 0 && slen < 2) || (s[1] >> 6) !=
> 0x02) {
> + memcpy(d, invalid, sizeof(invalid));
> + d += sizeof(invalid);
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + else {
> + memcpy(d, s, 2);
> + d += 2;
> + size += 2;
> + s += 1;
> + slen -= 1;
> + }
> + }
> + else if (((c >> 4) == 0x0E)) {
> + /* 3 bytes */
> + if ((slen > 0 && slen < 3) || (s[1] >> 6) !=
> 0x02 || (s[2] >> 6) != 0x02) {
> + memcpy(d, invalid, sizeof(invalid));
> + size += sizeof(invalid);
> + d += sizeof(invalid);
> + found = error = 1;
> + }
> + else {
> + memcpy(d, s, 3);
> + d += 3;
> + size += 3;
> + s += 2;
> + slen -= 2;
> + }
> + }
> + else if ((c >> 3) == 0x1E) {
> + /* 4 bytes */
> + if ((slen > 0 && slen < 4) || (s[1] >> 6) !=
> 0x02 || (s[2] >> 6) != 0x02 || (s[3] >> 6) != 0x02) {
> + memcpy(d, invalid, sizeof(invalid));
> + d += sizeof(invalid);
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + else {
> + memcpy(d, s, 4);
> + d += 4;
> + size += 4;
> + s += 3;
> + slen -= 3;
> + }
> + }
> + else {
> + memcpy(d, invalid, sizeof(invalid));
> + d += sizeof(invalid);
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + break;
> + }
> + }
> + else {
> + *d++ = c;
> + size++;
> + }
> + ++s;
> + slen--;
> + }
> + if (quote) {
> + *d++ = '"';
> + size += 1;
> + }
> + *d = '\0';
> + }
> + else {
> + if (quote) {
> + size += 1;
> + }
> + while ((c = *s) && slen) {
> + if (TEST_CHAR(c, T_ESCAPE_JSON)) {
> + switch (c) {
> + case '\b':
> + case '\f':
> + case '\n':
> + case '\r':
> + case '\t':
> + case '\\':
> + case '"':
> + size += 2;
> + found = 1;
> + break;
> + default:
> + if (c < 0x20) {
> + size += 5;
> + found = 1;
> + }
> + else if (((c >> 7) == 0x00)) {
How can this case happen? c's like this should fail the TEST_CHAR(c,
T_ESCAPE_JSON) test.
> + /* 1 byte */
> + size += 1;
> + }
> + else if ((slen < 0 || slen > 1) && ((c >> 5) ==
> 0x06) && s[1]) {
> + /* 2 bytes */
> + if ((slen > 0 && slen < 2) || (s[1] >> 6) !=
> 0x02) {
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + else {
> + size += 2;
> + s += 1;
> + slen -= 1;
> + }
> + }
> + else if (((c >> 4) == 0x0E)) {
> + /* 3 bytes */
> + if ((slen > 0 && slen < 3) || (s[1] >> 6) !=
> 0x02 || (s[2] >> 6) != 0x02) {
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + else {
> + size += 3;
> + s += 2;
> + slen -= 2;
> + }
> + }
> + else if ((c >> 3) == 0x1E) {
> + /* 4 bytes */
> + if ((slen > 0 && slen < 4) || (s[1] >> 6) !=
> 0x02 || (s[2] >> 6) != 0x02 || (s[3] >> 6) != 0x02) {
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + else {
> + size += 4;
> + s += 3;
> + slen -= 3;
> + }
> + }
> + else {
> + size += sizeof(invalid);
> + found = error = 1;
> + }
> + }
> + }
> + else {
> + size++;
> + }
> + ++s;
> + slen--;
> + }
> + if (quote) {
> + size += 1;
> + }
> + }
> + }
> +
> + else if (quote) {
> + if (d) {
> + memcpy(d, "NULL", 5);
> + }
> + else {
> + size += 4;
> + }
> + }
> +
> + if (len) {
> + *len = size;
> + }
> + if (error) {
> + return APR_EINVAL;
> + }
> + if (!found) {
> + return APR_NOTFOUND;
> + }
> +
> + return APR_SUCCESS;
> +}
> +
> +APR_DECLARE(const char *) apr_pescape_json(apr_pool_t *p, const char *src,
> + apr_ssize_t srclen, int quote)
The caller cannot easily determine if the src contained invalid data or was
already
properly escaped.
Can't we do a
APR_DECLARE(apr_status_t) pr_pescape_json(apr_pool_t *p, char **dest, const
char *src,
apr_ssize_t srclen, int quote)
instead which returns the status of apr_escape_json and the escaped JSON in
dest?
> +{
> + apr_size_t len;
> +
> + switch (apr_escape_json(NULL, src, srclen, quote, &len)) {
> + case APR_NOTFOUND: {
> + break;
> + }
> + default: {
> + char *encoded = apr_palloc(p, len);
> + apr_escape_json(encoded, src, srclen, quote, NULL);
> + return encoded;
> + }
> + }
> +
> return src;
> }
>