Hi all,
The current parser for the Cache-Control header doesn't take into
account quoted-string extensions to the header.
To fix this, I have created a modified implementation of apr_strtok()
called cache_strqtok(), that tokenises strings, but ignores the quoted
part of the strings, leaving them intact.
My question is, does cache_strqtok() belong in cache_util.c, as per
the patch below, or would it be a better idea to add it to apr
alongside apr_strtok() as apr_strqtok()?
Index: modules/cache/cache_util.c
===================================================================
--- modules/cache/cache_util.c (revision 1069969)
+++ modules/cache/cache_util.c (working copy)
@@ -27,6 +27,8 @@
extern module AP_MODULE_DECLARE_DATA cache_module;
+#define CACHE_SEPARATOR ", "
+
/* Determine if "url" matches the hostname, scheme and port and path
* in "filter". All but the path comparisons are case-insensitive.
*/
@@ -1022,6 +1024,74 @@
}
/**
+ * String tokenizer that ignores separator characters within quoted
strings
+ * and escaped characters, as per RFC2616 section 2.2.
+ */
+static char *cache_strqtok(char *str, const char *sep, char **last)
+{
+ char *token;
+ int quoted = 0;
+
+ if (!str) { /* subsequent call */
+ str = *last; /* start where we left off */
+ }
+
+ /* skip characters in sep (will terminate at '\0') */
+ while (*str && strchr(sep, *str)) {
+ ++str;
+ }
+
+ if (!*str) { /* no more tokens */
+ return NULL;
+ }
+
+ token = str;
+
+ /* skip valid token characters to terminate token and
+ * prepare for the next call (will terminate at '\0)
+ * on the way, ignore all quoted strings, and within
+ * quoted strings, escaped characters.
+ */
+ *last = token + 1;
+ while (**last) {
+ if (!quoted) {
+ if (**last == '\"') {
+ quoted = 1;
+ ++*last;
+ }
+ else if (!strchr(sep, **last)) {
+ ++*last;
+ }
+ else {
+ break;
+ }
+ }
+ else {
+ if (**last == '\"') {
+ quoted = 0;
+ ++*last;
+ }
+ else if (**last == '\\') {
+ ++*last;
+ if (**last) {
+ ++*last;
+ }
+ }
+ else {
+ ++*last;
+ }
+ }
+ }
+
+ if (**last) {
+ **last = '\0';
+ ++*last;
+ }
+
+ return token;
+}
+
+/**
* Parse the Cache-Control and Pragma headers in one go, marking
* which tokens appear within the header. Populate the structure
* passed in.
@@ -1043,7 +1113,7 @@
if (pragma_header) {
char *header = apr_pstrdup(r->pool, pragma_header);
- const char *token = apr_strtok(header, ", ", &last);
+ const char *token = cache_strqtok(header, CACHE_SEPARATOR,
&last);
while (token) {
/* handle most common quickest case... */
if (!strcmp(token, "no-cache")) {
@@ -1053,14 +1123,14 @@
else if (!strcasecmp(token, "no-cache")) {
cc->no_cache = 1;
}
- token = apr_strtok(NULL, ", ", &last);
+ token = cache_strqtok(NULL, CACHE_SEPARATOR, &last);
}
cc->pragma = 1;
}
if (cc_header) {
char *header = apr_pstrdup(r->pool, cc_header);
- const char *token = apr_strtok(header, ", ", &last);
+ const char *token = cache_strqtok(header, CACHE_SEPARATOR,
&last);
while (token) {
switch (token[0]) {
case 'n':
@@ -1178,7 +1248,7 @@
break;
}
}
- token = apr_strtok(NULL, ", ", &last);
+ token = cache_strqtok(NULL, CACHE_SEPARATOR, &last);
}
cc->cache_control = 1;
}
Regards,
Graham
--