pcs 96/10/29 07:23:05
Modified: src mod_negotiation.c
Log:
Reviewed By: Brian Behlendorf, Jim Jagielski
Update mod_negotation.c to support variants with multiple language
types. Languages can either be obtained from file extensions
(eg foo.fr.en.html) or on the Content-Language line in a type-map
file (eg Content-Language: fr, en).
This patch:
* Allows multiple comma separated languages on the Content-Language:
header in a type-map file
* Updates the function which sets the language quality factor for
each variant to select the best (highest q) match from the tags
of the variant, using the algorithm in HTTP/1.1 14.4
* If the new (HTTP/1.1) language negotiation results in a tie between
variants, falls back onto the Apache 1.1 language negotiation
algorythm, using _just_ the first language of the variant
* Updates the 406 text and Alternates header to list multiple languages
if necessary
Revision Changes Path
1.23 +174 -89 apache/src/mod_negotiation.c
Index: mod_negotiation.c
===================================================================
RCS file: /export/home/cvs/apache/src/mod_negotiation.c,v
retrieving revision 1.22
retrieving revision 1.23
diff -C3 -r1.22 -r1.23
*** mod_negotiation.c 1996/10/28 16:28:17 1.22
--- mod_negotiation.c 1996/10/29 15:23:03 1.23
***************
*** 82,87 ****
--- 82,98 ----
module negotiation_module;
+ char *merge_string_array (pool *p, array_header *arr, char *sep)
+ {
+ int i;
+ char *t = "";
+
+ for (i = 0; i < arr->nelts; i++) {
+ t = pstrcat(p, t, i ? sep : "", ((char**)arr->elts)[i], NULL);
+ }
+ return t;
+ }
+
void *create_neg_dir_config (pool *p, char *dummy)
{
neg_dir_config *new =
***************
*** 162,168 ****
char *type_name;
char *file_name;
char *content_encoding;
! char *content_language;
char *content_charset;
char *description;
--- 173,179 ----
char *type_name;
char *file_name;
char *content_encoding;
! array_header *content_languages; /* list of languages for this variant
*/
char *content_charset;
char *description;
***************
*** 230,236 ****
mime_info->type_name = "";
mime_info->file_name = "";
mime_info->content_encoding = "";
! mime_info->content_language = "";
mime_info->content_charset = "";
mime_info->description = "";
--- 241,247 ----
mime_info->type_name = "";
mime_info->file_name = "";
mime_info->content_encoding = "";
! mime_info->content_languages = NULL;
mime_info->content_charset = "";
mime_info->description = "";
***************
*** 392,397 ****
--- 403,429 ----
return accept_recs;
}
+ /* Given the text of the Content-Languages: line from the var map file,
+ * return an array containing the languages of this variant
+ */
+
+ array_header *do_languages_line (pool *p, char **lang_line)
+ {
+ array_header *lang_recs = make_array (p, 2, sizeof (char *));
+
+ if (!lang_line) return lang_recs;
+
+ while (**lang_line) {
+ char **new = (char **)push_array (lang_recs);
+ *new = get_token (p, lang_line, 0);
+ str_tolower (*new);
+ if (**lang_line == ',')
+ ++(*lang_line);
+ }
+
+ return lang_recs;
+ }
+
/*****************************************************************
*
* Handling header lines from clients...
***************
*** 648,655 ****
mime_info.bytes = atoi(body);
}
else if (!strncmp (buffer, "content-language:", 17)) {
! mime_info.content_language = get_token (neg->pool, &body, 0);
! str_tolower (mime_info.content_language);
}
else if (!strncmp (buffer, "content-encoding:", 17)) {
mime_info.content_encoding = get_token (neg->pool, &body, 0);
--- 680,687 ----
mime_info.bytes = atoi(body);
}
else if (!strncmp (buffer, "content-language:", 17)) {
! mime_info.content_languages =
! do_languages_line(neg->pool, &body);
}
else if (!strncmp (buffer, "content-encoding:", 17)) {
mime_info.content_encoding = get_token (neg->pool, &body, 0);
***************
*** 756,764 ****
mime_info.content_encoding = sub_req->content_encoding;
str_tolower(mime_info.content_encoding);
}
! if (sub_req->content_language) {
! mime_info.content_language = sub_req->content_language;
! str_tolower(mime_info.content_language);
}
get_entry (neg->pool, &accept_info, sub_req->content_type);
--- 788,800 ----
mime_info.content_encoding = sub_req->content_encoding;
str_tolower(mime_info.content_encoding);
}
! if (sub_req->content_languages) {
! int i;
! mime_info.content_languages = sub_req->content_languages;
! if (mime_info.content_languages)
! for (i = 0; i < mime_info.content_languages->nelts; ++i)
! str_tolower(((char**)
! (mime_info.content_languages->elts))[i]);
}
get_entry (neg->pool, &accept_info, sub_req->content_type);
***************
*** 958,964 ****
if (!neg->use_transparent_neg)
for (j = 0; j < neg->avail_vars->nelts; ++j) {
var_rec *variant = &avail_recs[j];
! if (variant->content_language && *variant->content_language) {
neg->default_lang_quality = 0.001;
return;
}
--- 994,1001 ----
if (!neg->use_transparent_neg)
for (j = 0; j < neg->avail_vars->nelts; ++j) {
var_rec *variant = &avail_recs[j];
! if (variant->content_languages &&
! variant->content_languages->nelts) {
neg->default_lang_quality = 0.001;
return;
}
***************
*** 978,983 ****
--- 1015,1026 ----
* match, use the longest string from the Accept-Language: header
* (see HTTP/1.1 [14.4])
*
+ * When a variant has multiple languages, we find the 'best'
+ * match for each variant language tag as above, then select the
+ * one with the highest q value. Because both the accept-header
+ * and variant can have multiple languages, we now have a hairy
+ * loop-within-a-loop here.
+ *
* If the variant has no language and we have no Accept-Language
* items, leave the quality at 1.0 and return.
*
***************
*** 994,1089 ****
void set_language_quality(negotiation_state *neg, var_rec *variant)
{
- accept_rec *accs, *best = NULL, *star = NULL;
int i;
- char *lang = variant->content_language;
- int prefixlen = 0;
- char *p;
int naccept = neg->accept_langs->nelts;
int index;
neg_dir_config *conf = NULL;
! int longest_lang_range_len = 0;
! int len;
if (naccept == 0)
conf = (neg_dir_config *) get_module_config (neg->r->per_dir_config,
&negotiation_module);
! if (naccept == 0 && (!lang || !*lang))
! return; /* variant has no assigned language */
!
! p = strchr(lang, '-'); /* find prefix part (if any) */
! if (p)
! prefixlen = p - lang;
! if (!lang || !*lang) {
/* This variant has no content-language, so use the default
* quality factor for variants with no content-language
* (previously set by set_default_lang_quality()). */
variant->lang_quality = neg->default_lang_quality;
}
else if (naccept) {
float fiddle_q = 0.0;
!
! accs = (accept_rec *)neg->accept_langs->elts;
!
! for (i = 0; i < neg->accept_langs->nelts; ++i) {
! if (!strcmp(accs[i].type_name, "*")) {
! star = &accs[i];
! continue;
! }
!
! /* Find language. We match if either the variant language
! * tag exactly matches, or the prefix of the tag up to the
! * '-' character matches the whole of the language in the
! * Accept-Language header */
! if ((!strcmp (lang, accs[i].type_name) ||
! (prefixlen &&
! !strncmp(lang, accs[i].type_name, prefixlen) &&
! (accs[i].type_name[prefixlen] == '\0'))) &&
! ((len = strlen(accs[i].type_name)) >
! longest_lang_range_len)) {
! longest_lang_range_len = len;
! best = &accs[i];
! }
!
! if (! best) {
! /* The next bit is a fiddle. Some browsers might be
! * configured to send more specific language ranges
! * than desirable. For example, an Accept-Language of
! * en-US should never match variants with languages en
! * or en-GB. But US English speakers might pick en-US
! * as their language choice. So this fiddle checks if
! * the language range has a prefix, and if so, it
! * matches variants which match that prefix with a
! * priority of 0.001. So a request for en-US would
! * match variants of types en and en-GB, but at much
! * lower priority than matches of en-US directly, or
! * of any other language listed on the Accept-Language
! * header
! */
! if ((p = strchr(accs[i].type_name, '-'))) {
! int plen = p - accs[i].type_name;
! if (!strncmp(lang, accs[i].type_name, plen))
! fiddle_q = 0.001;
! }
! }
!
! }
! variant->lang_quality = best ? best->quality :
(star ? star->quality : fiddle_q);
- variant->definite = variant->definite && best;
}
! /* Now set the old lang_index field */
index = 0;
if (naccept == 0) /* Client doesn't care */
! index = find_default_index (conf,
! variant->content_language);
else /* Client has Accept-Language */
! index = find_lang_index (neg->accept_langs,
! variant->content_language);
variant->lang_index = index;
return;
--- 1037,1172 ----
void set_language_quality(negotiation_state *neg, var_rec *variant)
{
int i;
int naccept = neg->accept_langs->nelts;
int index;
neg_dir_config *conf = NULL;
! char *firstlang;
if (naccept == 0)
conf = (neg_dir_config *) get_module_config (neg->r->per_dir_config,
&negotiation_module);
! if (naccept == 0 && (!variant->content_languages ||
! !variant->content_languages->nelts))
! return; /* no accept-language and no variant lang */
! if (!variant->content_languages || !variant->content_languages->nelts) {
/* This variant has no content-language, so use the default
* quality factor for variants with no content-language
* (previously set by set_default_lang_quality()). */
variant->lang_quality = neg->default_lang_quality;
+
+ if (naccept == 0)
+ return; /* no accept-language items */
+
}
else if (naccept) {
+ /* Variant has one (or more) langauges, and we have one (or more)
+ * language ranges on the Accept-Language header. Look for
+ * the best match. We do this by going through each language
+ * on the variant description looking for a match on the
+ * Accept-Language header. The best match is the longest matching
+ * language on the header. The final result is the best q value
+ * from all the languages on the variant description.
+ */
+ int j;
float fiddle_q = 0.0;
! accept_rec *accs = (accept_rec *)neg->accept_langs->elts;
! accept_rec *best = NULL, *star = NULL;
! char *p;
! for (j = 0; j < variant->content_languages->nelts; ++j) {
! char *lang; /* language from variant description */
! accept_rec *bestthistag = NULL;
! int prefixlen = 0;
! int longest_lang_range_len = 0;
! int len;
! /* lang is the variant's language-tag, which is the one
! * we are allowed to use the prefix of in HTTP/1.1
! */
! lang = ((char **)(variant->content_languages->elts))[j];
! p = strchr(lang, '-'); /* find prefix part (if any) */
! if (p)
! prefixlen = p - lang;
!
! /* now find the best (i.e. longest) matching Accept-Language
! * header language. We put the best match for this tag in
! * bestthistag. We cannot update the overall best (based on
! * q value) because the best match for this tag is the longest
! * language item on the accept header, not necessarily the
! * highest q.
! */
! for (i = 0; i < neg->accept_langs->nelts; ++i) {
! if (!strcmp(accs[i].type_name, "*")) {
! if (!star)
! star = &accs[i];
! continue;
! }
!
! /* Find language. We match if either the variant language
! * tag exactly matches, or the prefix of the tag up to the
! * '-' character matches the whole of the language in the
! * Accept-Language header. We only use this accept-language
! * item as the best match for the current tag if it
! * is longer than the previous best match */
! if ((!strcmp (lang, accs[i].type_name) ||
! (prefixlen &&
! !strncmp(lang, accs[i].type_name, prefixlen) &&
! (accs[i].type_name[prefixlen] == '\0'))) &&
! ((len = strlen(accs[i].type_name)) >
! longest_lang_range_len)) {
! longest_lang_range_len = len;
! bestthistag = &accs[i];
! }
!
! if (! bestthistag) {
! /* The next bit is a fiddle. Some browsers might be
! * configured to send more specific language ranges
! * than desirable. For example, an Accept-Language of
! * en-US should never match variants with languages en
! * or en-GB. But US English speakers might pick en-US
! * as their language choice. So this fiddle checks if
! * the language range has a prefix, and if so, it
! * matches variants which match that prefix with a
! * priority of 0.001. So a request for en-US would
! * match variants of types en and en-GB, but at much
! * lower priority than matches of en-US directly, or
! * of any other language listed on the Accept-Language
! * header
! */
! if ((p = strchr(accs[i].type_name, '-'))) {
! int plen = p - accs[i].type_name;
! if (!strncmp(lang, accs[i].type_name, plen))
! fiddle_q = 0.001;
! }
! }
! }
! /* Finished looking at Accept-Language headers, the best
! * (longest) match is in bestthistag, or NULL if no match
! */
! if (!best ||
! (bestthistag && bestthistag->quality > best->quality))
! best = bestthistag;
! }
!
! variant->lang_quality = best ? best->quality :
(star ? star->quality : fiddle_q);
}
! /* Now set the old lang_index field. Since this is old
! * stuff anyway, don't both with handling multiple languages
! * per variant, just use the first one assigned to it
! */
index = 0;
+ if (variant->content_languages && variant->content_languages->nelts)
+ firstlang = ((char**)variant->content_languages->elts)[0];
+ else
+ firstlang = "";
if (naccept == 0) /* Client doesn't care */
! index = find_default_index (conf, firstlang);
else /* Client has Accept-Language */
! index = find_lang_index (neg->accept_langs, firstlang);
variant->lang_index = index;
return;
***************
*** 1326,1332 ****
fprintf(stderr, "Variant: file=%s type=%s lang=%s acceptq=%1.3f
langq=%1.3f typeq=%1.3f q=%1.3f definite=%d\n",
variant->file_name ? variant->file_name : "",
variant->type_name ? variant->type_name : "",
! variant->content_language ? variant->content_language : "",
variant->accept_type_quality,
variant->lang_quality,
variant->type_quality,
--- 1409,1415 ----
fprintf(stderr, "Variant: file=%s type=%s lang=%s acceptq=%1.3f
langq=%1.3f typeq=%1.3f q=%1.3f definite=%d\n",
variant->file_name ? variant->file_name : "",
variant->type_name ? variant->type_name : "",
! variant->content_languages ? merge_string_array(neg->pool,
variant->content_languages, ",") : "",
variant->accept_type_quality,
variant->lang_quality,
variant->type_quality,
***************
*** 1594,1605 ****
else if (strcmp(sample_type, variant->type_name))
vary_by_type = 1;
}
! if (variant->content_language) {
! if (*variant->content_language)
! rec = pstrcat(r->pool, rec, " {language ",
! variant->content_language, "}", NULL);
! if (!sample_language) sample_language =
variant->content_language;
! else if (strcmp(sample_language, variant->content_language))
vary_by_language = 1;
}
if (variant->content_encoding) {
--- 1677,1688 ----
else if (strcmp(sample_type, variant->type_name))
vary_by_type = 1;
}
! if (variant->content_languages &&
variant->content_languages->nelts) {
! char *langs =
! merge_string_array (r->pool, variant->content_languages, ",");
! rec = pstrcat(r->pool, rec, " {language ", langs, "}", NULL);
! if (!sample_language) sample_language = langs;
! else if (strcmp(sample_language, langs))
vary_by_language = 1;
}
if (variant->content_encoding) {
***************
*** 1654,1672 ****
var_rec *variant = &((var_rec *)neg->avail_vars->elts)[i];
char *filename = variant->file_name ? variant->file_name : "";
char *content_type = variant->type_name ? variant->type_name : "";
! char *content_language =
! variant->content_language ? variant->content_language : "";
char *description = variant->description ? variant->description :
"";
/* The format isn't very neat, and it would be nice to make
* the tags human readable (eg replace 'language en' with
* 'English'). */
t = pstrcat(r->pool, t, "<li><a href=\"", filename, "\">",
! filename, "</a> ", description,
! " type ", content_type,
! *content_language ? " language " : "", content_language,
! "\n",
! NULL);
}
t = pstrcat(r->pool, t, "</ul>\n", NULL);
--- 1737,1757 ----
var_rec *variant = &((var_rec *)neg->avail_vars->elts)[i];
char *filename = variant->file_name ? variant->file_name : "";
char *content_type = variant->type_name ? variant->type_name : "";
! array_header *languages = variant->content_languages;
char *description = variant->description ? variant->description :
"";
/* The format isn't very neat, and it would be nice to make
* the tags human readable (eg replace 'language en' with
* 'English'). */
t = pstrcat(r->pool, t, "<li><a href=\"", filename, "\">",
! filename, "</a> ", description, NULL);
! if (content_type)
! t = pstrcat(r->pool, t, " type ", content_type, NULL);
! if (languages && languages->nelts)
! t = pstrcat(r->pool, t, " language ",
! merge_string_array(r->pool, languages, ", "),
! NULL);
! t = pstrcat(r->pool, t, "\n", NULL);
}
t = pstrcat(r->pool, t, "</ul>\n", NULL);
***************
*** 1853,1859 ****
r->handler = sub_req->handler;
r->content_type = sub_req->content_type;
r->content_encoding = sub_req->content_encoding;
! r->content_language = sub_req->content_language;
r->finfo = sub_req->finfo;
return OK;
--- 1938,1944 ----
r->handler = sub_req->handler;
r->content_type = sub_req->content_type;
r->content_encoding = sub_req->content_encoding;
! r->content_languages = sub_req->content_languages;
r->finfo = sub_req->finfo;
return OK;