On Tue, Oct 19, 2021 at 4:31 PM <[email protected]> wrote:
>
> +APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade
> *bbOut,
> + apr_bucket_brigade
> *bbIn,
> + apr_read_type_e block,
> + const char *boundary,
> + apr_size_t boundary_len,
> + apr_off_t maxbytes)
> +{
[]
> + /*
> + * Fast path.
> + *
> + * If we have at least one boundary worth of data, do an optimised
> + * substring search for the boundary, and split quickly if found.
> + */
> + if (len >= boundary_len) {
> +
> + apr_size_t off;
> + apr_size_t leftover;
> +
> + pos = strnstr(str, boundary, len);
apr_strnstr() maybe, strnstr() is non-standard AFAICT and possibly not
available on all platforms (Windows)?
> +
> + /* definitely found it, we leave */
> + if (pos != NULL) {
> +
> + off = pos - str;
> +
> + /* everything up to the boundary */
> + if (off) {
> +
> + apr_bucket_split(e, off);
> + APR_BUCKET_REMOVE(e);
> + APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> + e = APR_BRIGADE_FIRST(bbIn);
> + }
> +
> + /* cut out the boundary */
> + apr_bucket_split(e, boundary_len);
> + apr_bucket_delete(e);
> +
> + return APR_SUCCESS;
> + }
> +
> + /* any partial matches at the end? */
> + leftover = boundary_len - 1;
> + off = (len - leftover);
Can't we fall through the below "else" slow path here? The code looks
quite similar.
> +
> + while (leftover) {
> + if (!strncmp(str + off, boundary, leftover)) {
> +
> + if (off) {
> +
> + apr_bucket_split(e, off);
> + APR_BUCKET_REMOVE(e);
> + APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> + e = APR_BRIGADE_FIRST(bbIn);
> + }
> +
> + outbytes += off;
> + inbytes -= off;
> +
> + goto skip;
> + }
> + off++;
> + leftover--;
> + }
> +
> + APR_BUCKET_REMOVE(e);
> + APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> + outbytes += len;
> +
> + continue;
> +
> + }
> +
> + /*
> + * Slow path.
> + *
> + * We need to read ahead at least one boundary worth of data so
> + * we can search across the bucket edges.
> + */
> + else {
> +
> + apr_size_t off = 0;
> +
> + /* find all definite non matches */
> + while (len) {
> + if (!strncmp(str + off, boundary, len)) {
> +
> + if (off) {
> +
> + apr_bucket_split(e, off);
> + APR_BUCKET_REMOVE(e);
> + APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> + e = APR_BRIGADE_FIRST(bbIn);
> + }
> +
> + inbytes -= off;
> +
> + goto skip;
> + }
> + off++;
> + len--;
> + }
> +
> + APR_BUCKET_REMOVE(e);
> + APR_BRIGADE_INSERT_TAIL(bbOut, e);
> + continue;
"outbytes" not bumped?
> +
> + }
> +
> + /*
> + * If we reach skip, it means the bucket in e is:
> + *
> + * - shorter than the boundary
> + * - matches the boundary up to the bucket length
> + * - might match more buckets
> + *
> + * Read further buckets and check whether the boundary matches all
> + * the way to the end. If so, we have a match. If no match, shave off
> + * one byte and continue round to try again.
> + */
> +skip:
Below you read ahead to try to match the end of the boundary, and if
not split one byte into bbOut before restarting the loop, which could
result in a quite fragmented bbOut and is also what you almost did
above already.
Couldn't we instead save the partially matching bucket(s) in a
temporary brigade (provided by the caller) and restart the loop with
the more effective [apr_]strnstr() match (at boundary+off)?
> +
> + for (next = APR_BUCKET_NEXT(e);
> + inbytes < boundary_len && next != APR_BRIGADE_SENTINEL(bbIn);
> + next = APR_BUCKET_NEXT(next)) {
> +
> + const char *str;
> + apr_size_t off;
> + apr_size_t len;
> +
> + rv = apr_bucket_read(next, &str, &len, block);
> +
> + if (rv != APR_SUCCESS) {
> + return rv;
> + }
> +
> + off = boundary_len - inbytes;
> +
> + if (len > off) {
> +
> + /* not a match, bail out */
> + if (strncmp(str, boundary + inbytes, off)) {
> + break;
> + }
> +
> + /* a match! remove the boundary and return */
> + apr_bucket_split(next, off);
> +
> + e = APR_BUCKET_NEXT(next);
> +
> + for (prev = APR_BRIGADE_FIRST(bbIn);
> + prev != e;
> + prev = APR_BRIGADE_FIRST(bbIn)) {
> +
> + apr_bucket_delete(prev);
> +
> + }
> +
> + return APR_SUCCESS;
> +
> + }
> + if (len == off) {
> +
> + /* not a match, bail out */
> + if (strncmp(str, boundary + inbytes, off)) {
> + break;
> + }
> +
> + /* a match! remove the boundary and return */
> + e = APR_BUCKET_NEXT(next);
> +
> + for (prev = APR_BRIGADE_FIRST(bbIn);
> + prev != e;
> + prev = APR_BRIGADE_FIRST(bbIn)) {
> +
> + apr_bucket_delete(prev);
> +
> + }
> +
> + return APR_SUCCESS;
> +
> + }
> + else if (len) {
> +
> + /* not a match, bail out */
> + if (strncmp(str, boundary + inbytes, len)) {
> + break;
> + }
> +
> + /* still hope for a match */
> + inbytes += len;
> + }
> +
> + }
> +
> + /*
> + * If we reach this point, the bucket e did not match the boundary
> + * in the subsequent buckets.
> + *
> + * Bump one byte off, and loop round to search again.
> + */
> + apr_bucket_split(e, 1);
> + APR_BUCKET_REMOVE(e);
> + APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> + outbytes++;
> +
> + }
> +
> + return APR_INCOMPLETE;
> +}
Regards;
Yann.