coar        99/12/09 10:10:42

  Modified:    src      CHANGES
               .        STATUS
               src/modules/standard mod_mime.c
               htdocs/manual/mod mod_mime.html
  Log:
        Add the AddCharset functionality to mod_mime.
  
  Submitted by: Youichirou Koga <[EMAIL PROTECTED]>
  Reviewed by:  Ken Coar, Martin Kraemer
  
  Revision  Changes    Path
  1.1478    +3 -0      apache-1.3/src/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/CHANGES,v
  retrieving revision 1.1477
  retrieving revision 1.1478
  diff -u -r1.1477 -r1.1478
  --- CHANGES   1999/12/09 17:19:28     1.1477
  +++ CHANGES   1999/12/09 18:10:24     1.1478
  @@ -1,4 +1,7 @@
   Changes with Apache 1.3.10
  +  *) Enhance mod_mime with an AddCharset directive to properly handle
  +     that negotiation dimension.
  +     [Youichirou Koga <[EMAIL PROTECTED]>]
   
     *) OS: Added first cut at support for IBM's OS/390.
        [Ovies Brabson <[EMAIL PROTECTED]>]
  
  
  
  1.764     +1 -4      apache-1.3/STATUS
  
  Index: STATUS
  ===================================================================
  RCS file: /home/cvs/apache-1.3/STATUS,v
  retrieving revision 1.763
  retrieving revision 1.764
  diff -u -r1.763 -r1.764
  --- STATUS    1999/12/09 17:21:35     1.763
  +++ STATUS    1999/12/09 18:10:30     1.764
  @@ -1,5 +1,5 @@
     1.3 STATUS:
  -  Last modified at [$Date: 1999/12/09 17:21:35 $]
  +  Last modified at [$Date: 1999/12/09 18:10:30 $]
   
   Release:
   
  @@ -42,9 +42,6 @@
   
   
   Available Patches:
  -    * Youichirou Koga's patch to add AddCharset to mod_mime.
  -     Message-ID: <384E91DF.D92FABC2.Golux.Com>
  -     Status: Ken +1, Martin +1(after minor fix)
   
       * Andrew Ford's patch (1999/12/05) to add absolute times to mod_expires
        Message-ID: <[EMAIL PROTECTED]>
  
  
  
  1.50      +336 -11   apache-1.3/src/modules/standard/mod_mime.c
  
  Index: mod_mime.c
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/modules/standard/mod_mime.c,v
  retrieving revision 1.49
  retrieving revision 1.50
  diff -u -r1.49 -r1.50
  --- mod_mime.c        1999/04/20 17:27:51     1.49
  +++ mod_mime.c        1999/12/09 18:10:34     1.50
  @@ -75,6 +75,7 @@
   typedef struct {
       table *forced_types;        /* Additional AddTyped stuff */
       table *encoding_types;      /* Added with AddEncoding... */
  +    table *charset_types;    /* Added with AddCharset... */
       table *language_types;      /* Added with AddLanguage... */
       table *handlers;            /* Added with AddHandler...  */
       array_header *handlers_remove;     /* List of handlers to remove */
  @@ -84,6 +85,24 @@
       char *default_language;     /* Language if no AddLanguage ext found */
   } mime_dir_config;
   
  +typedef struct param_s {
  +    char *attr;
  +    char *val;
  +    struct param_s *next;
  +} param;
  +
  +typedef struct {
  +    char *type;
  +    char *subtype;
  +    param *param;
  +} content_type;
  +
  +static char tspecial[] = {
  +    '(', ')', '<', '>', '@', ',', ';', ':',
  +    '\\', '"', '/', '[', ']', '?', '=',
  +    '\0'
  +};
  +
   module MODULE_VAR_EXPORT mime_module;
   
   static void *create_mime_dir_config(pool *p, char *dummy)
  @@ -93,6 +112,7 @@
   
       new->forced_types = ap_make_table(p, 4);
       new->encoding_types = ap_make_table(p, 4);
  +    new->charset_types = ap_make_table(p, 4);
       new->language_types = ap_make_table(p, 4);
       new->handlers = ap_make_table(p, 4);
       new->handlers_remove = ap_make_array(p, 4, sizeof(handlers_info));
  @@ -119,9 +139,11 @@
       }
   
       new->forced_types = ap_overlay_tables(p, add->forced_types,
  -                                       base->forced_types);
  +                                      base->forced_types);
       new->encoding_types = ap_overlay_tables(p, add->encoding_types,
                                            base->encoding_types);
  +    new->charset_types = ap_overlay_tables(p, add->charset_types,
  +                                        base->charset_types);
       new->language_types = ap_overlay_tables(p, add->language_types,
                                            base->language_types);
       new->handlers = ap_overlay_tables(p, add->handlers,
  @@ -135,17 +157,18 @@
       return new;
   }
   
  -static const char *add_type(cmd_parms *cmd, mime_dir_config * m, char *ct,
  +static const char *add_type(cmd_parms *cmd, mime_dir_config *m, char *ct,
                               char *ext)
   {
       if (*ext == '.')
  -        ++ext;
  +     ++ext;
  +     
       ap_str_tolower(ct);
       ap_table_setn(m->forced_types, ext, ct);
       return NULL;
   }
   
  -static const char *add_encoding(cmd_parms *cmd, mime_dir_config * m, char 
*enc,
  +static const char *add_encoding(cmd_parms *cmd, mime_dir_config *m, char 
*enc,
                                   char *ext)
   {
       if (*ext == '.')
  @@ -155,17 +178,29 @@
       return NULL;
   }
   
  -static const char *add_language(cmd_parms *cmd, mime_dir_config * m, char 
*lang,
  +static const char *add_charset(cmd_parms *cmd, mime_dir_config *m,
  +                            char *charset, char *ext)
  +{
  +    if (*ext == '.') {
  +     ++ext;
  +    }
  +    ap_str_tolower(charset);
  +    ap_table_setn(m->charset_types, ext, charset);
  +    return NULL;
  +}
  +
  +static const char *add_language(cmd_parms *cmd, mime_dir_config *m, char 
*lang,
                                   char *ext)
   {
  -    if (*ext == '.')
  -        ++ext;
  +    if (*ext == '.') {
  +     ++ext;
  +    }
       ap_str_tolower(lang);
       ap_table_setn(m->language_types, ext, lang);
       return NULL;
   }
   
  -static const char *add_handler(cmd_parms *cmd, mime_dir_config * m, char 
*hdlr,
  +static const char *add_handler(cmd_parms *cmd, mime_dir_config *m, char 
*hdlr,
                                  char *ext)
   {
       if (*ext == '.')
  @@ -209,6 +244,8 @@
        "a mime type followed by one or more file extensions"},
       {"AddEncoding", add_encoding, NULL, OR_FILEINFO, ITERATE2,
        "an encoding (e.g., gzip), followed by one or more file extensions"},
  +    {"AddCharset", add_charset, NULL, OR_FILEINFO, ITERATE2,
  +     "a charset (e.g., iso-2022-jp), followed by one or more file 
extensions"},
       {"AddLanguage", add_language, NULL, OR_FILEINFO, ITERATE2,
        "a language (e.g., fr), followed by one or more file extensions"},
       {"AddHandler", add_handler, NULL, OR_FILEINFO, ITERATE2,
  @@ -275,6 +312,248 @@
       ap_cfg_closefile(f);
   }
   
  +static char *zap_sp(char *s)
  +{
  +    char *tp;
  +
  +    if (s == NULL) {
  +     return (NULL);
  +    }
  +    if (*s == '\0') {
  +     return (s);
  +    }
  +
  +    /* delete prefixed white space */
  +    for (; *s == ' ' || *s == '\t' || *s == '\n'; s++);
  +
  +    /* delete postfixed white space */
  +    for (tp = s; *tp != '\0'; tp++);
  +    for (tp--; tp != s && (*tp == ' ' || *tp == '\t' || *tp == '\n'); tp--) {
  +     *tp = '\0';
  +    }
  +    return (s);
  +}
  +
  +static int is_token(char c)
  +{
  +    int res;
  +
  +    res = (isascii(c) && isgraph(c)
  +        && (strchr(tspecial, c) == NULL)) ? 1 : -1;
  +    return res;
  +}
  +
  +static int is_qtext(char c)
  +{
  +    int res;
  +
  +    res = (isascii(c) && (c != '"') && (c != '\\') && (c != '\n')) ? 1 : -1;
  +    return res;
  +}
  +
  +static int is_quoted_pair(char *s)
  +{
  +    int res = -1;
  +    int c;
  +
  +    if (((s + 1) != NULL) && (*s == '\\')) {
  +     c = (int) *(s + 1);
  +     if (isascii(c)) {
  +         res = 1;
  +     }
  +    }
  +    return (res);
  +}
  +
  +static content_type *analyze_ct(pool *p, char *s)
  +{
  +    char *tp, *mp, *cp;
  +    char *attribute, *value;
  +    int quoted = 0;
  +
  +    content_type *ctp;
  +    param *pp, *npp;
  +
  +    /* initialize ctp */
  +    ctp = (content_type *) ap_palloc(p, sizeof(content_type));
  +    ctp->type = NULL;
  +    ctp->subtype = NULL;
  +    ctp->param = NULL;
  +
  +    tp = ap_pstrdup(p, s);
  +
  +    mp = tp;
  +    cp = mp;
  +
  +    /* getting a type */
  +    if (!(cp = strchr(mp, '/'))) {
  +     ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                  "mod_mime: analyze_ct: cannot get media type from '%s'",
  +                  mp);
  +     return (NULL);
  +    }
  +    ctp->type = ap_pstrndup(p, mp, cp - mp);
  +    ctp->type = zap_sp(ctp->type);
  +    if (ctp->type == NULL || *(ctp->type) == '\0' ||
  +     strchr(ctp->type, ';') || strchr(ctp->type, ' ') ||
  +     strchr(ctp->type, '\t')) {
  +     ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                  "Cannot get media subtype.");
  +     return (NULL);
  +    }
  +
  +    /* getting a subtype */
  +    cp++;
  +    mp = cp;
  +
  +    for (; *cp != ';' && *cp != '\0'; cp++);
  +    ctp->subtype = ap_pstrndup(p, mp, cp - mp);
  +    ctp->subtype = zap_sp(ctp->subtype);
  +    if ((ctp->subtype == NULL) || (*(ctp->subtype) == '\0') ||
  +     strchr(ctp->subtype, ' ') || strchr(ctp->subtype, '\t')) {
  +     ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                  "Cannot get media subtype.");
  +     return (NULL);
  +    }
  +    cp = zap_sp(cp);
  +    if (cp == NULL || *cp == '\0') {
  +     return (ctp);
  +    }
  +
  +    /* getting parameters */
  +    cp++;
  +    cp = zap_sp(cp);
  +    if (cp == NULL || *cp == '\0') {
  +     ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                  "Cannot get media parameter.");
  +     return (NULL);
  +    }
  +    mp = cp;
  +    attribute = NULL;
  +    value = NULL;
  +
  +    while (cp != NULL && *cp != '\0') {
  +     if (attribute == NULL) {
  +         if (is_token((int) *cp) > 0) {
  +             cp++;
  +             continue;
  +         }
  +         else if (*cp == ' ' || *cp == '\t' || *cp == '\n') {
  +             cp++;
  +             continue;
  +         }
  +         else if (*cp == '=') {
  +             attribute = ap_pstrndup(p, mp, cp - mp);
  +             attribute = zap_sp(attribute);
  +             if (attribute == NULL || *attribute == '\0') {
  +                 ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                              "Cannot get media parameter.");
  +                 return (NULL);
  +             }
  +             cp++;
  +             cp = zap_sp(cp);
  +             if (cp == NULL || *cp == '\0') {
  +                 ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                              "Cannot get media parameter.");
  +                 return (NULL);
  +             }
  +             mp = cp;
  +             continue;
  +         }
  +         else {
  +             ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                          "Cannot get media parameter.");
  +             return (NULL);
  +         }
  +     }
  +     else {
  +         if (mp == cp) {
  +             if (*cp == '"') {
  +                 quoted = 1;
  +                 cp++;
  +             }
  +             else {
  +                 quoted = 0;
  +             }
  +         }
  +         if (quoted > 0) {
  +             while (quoted && *cp != '\0') {
  +                 if (is_qtext((int) *cp) > 0) {
  +                     cp++;
  +                 }
  +                 else if (is_quoted_pair(cp) > 0) {
  +                     cp += 2;
  +                 }
  +                 else if (*cp == '"') {
  +                     cp++;
  +                     while (*cp == ' ' || *cp == '\t' || *cp == '\n') {
  +                         cp++;
  +                     }
  +                     if (*cp != ';' && *cp != '\0') {
  +                         ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                                      "Cannot get media parameter.");
  +                         return(NULL);
  +                     }
  +                     quoted = 0;
  +                 }
  +                 else {
  +                     ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                                  "Cannot get media parameter.");
  +                     return (NULL);
  +                 }
  +             }
  +         }
  +         else {
  +             while (1) {
  +                 if (is_token((int) *cp) > 0) {
  +                     cp++;
  +                 }
  +                 else if (*cp == '\0' || *cp == ';') {
  +                     break;
  +                 }
  +                 else {
  +                     ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                                  "Cannot get media parameter.");
  +                     return (NULL);
  +                 }
  +             }
  +         }
  +         value = ap_pstrndup(p, mp, cp - mp);
  +         value = zap_sp(value);
  +         if (value == NULL || *value == '\0') {
  +             ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +                          "Cannot get media parameter.");
  +             return (NULL);
  +         }
  +
  +         pp = ap_palloc(p, sizeof(param));
  +         pp->attr = attribute;
  +         pp->val = value;
  +         pp->next = NULL;
  +
  +         if (ctp->param == NULL) {
  +             ctp->param = pp;
  +         }
  +         else {
  +             npp = ctp->param;
  +             while (npp->next) {
  +                 npp = npp->next;
  +             }
  +             npp->next = pp;
  +         }
  +         quoted = 0;
  +         attribute = NULL;
  +         value = NULL;
  +         if (*cp == '\0') {
  +             break;
  +         }
  +         cp++;
  +         mp = cp;
  +     }
  +    }
  +    return (ctp);
  +}
  +
   static int find_ct(request_rec *r)
   {
       const char *fn = strrchr(r->filename, '/');
  @@ -283,6 +562,7 @@
       char *ext;
       const char *orighandler = r->handler;
       const char *type;
  +    const char *charset = NULL;
   
       if (S_ISDIR(r->finfo.st_mode)) {
           r->content_type = DIR_MAGIC_TYPE;
  @@ -294,8 +574,9 @@
        * pointer to getword, causing a SEGV ..
        */
   
  -    if (fn == NULL)
  -        fn = r->filename;
  +    if (fn == NULL) {
  +     fn = r->filename;
  +    }
   
       /* Parse filename extensions, which can be in any order */
       while ((ext = ap_getword(r->pool, &fn, '.')) && *ext) {
  @@ -308,6 +589,12 @@
               found = 1;
           }
   
  +     /* Add charset to Content-Type */
  +     if ((type = ap_table_get(conf->charset_types, ext))) {
  +         charset = type;
  +         found = 1;
  +     }
  +
           /* Check for Content-Language */
           if ((type = ap_table_get(conf->language_types, ext))) {
               const char **new;
  @@ -347,8 +634,46 @@
               r->content_languages = NULL;
               r->content_encoding = NULL;
               r->handler = orighandler;
  -        }
  +         charset = NULL;
  +     }
  +    }
   
  +    if (r->content_type) {
  +     content_type *ctp;
  +     char *ct;
  +     int override = 0;
  +
  +     ct = (char *) ap_palloc(r->pool,
  +                             sizeof(char) * (strlen(r->content_type) + 1));
  +     strcpy(ct, r->content_type);
  +
  +     if ((ctp = analyze_ct(r->pool, ct))) {
  +         param *pp = ctp->param;
  +         r->content_type = ap_pstrcat(r->pool, ctp->type, "/",
  +                                      ctp->subtype, NULL);
  +         while (pp != NULL) {
  +             if (charset && !strcmp(pp->attr, "charset")) {
  +                 if (!override) {
  +                     r->content_type = ap_pstrcat(r->pool, r->content_type,
  +                                                  "; charset=", charset,
  +                                                  NULL);
  +                     override = 1;
  +                 }
  +             }
  +             else {
  +                 r->content_type = ap_pstrcat(r->pool, r->content_type,
  +                                              "; ", pp->attr,
  +                                              "=", pp->val,
  +                                              NULL);
  +             }
  +             pp = pp->next;
  +         }
  +         if (charset && !override) {
  +             r->content_type = ap_pstrcat(r->pool, r->content_type,
  +                                          "; charset=", charset,
  +                                          NULL);
  +         }
  +     }
       }
   
       /* Set default language, if none was specified by the extensions
  
  
  
  1.33      +60 -5     apache-1.3/htdocs/manual/mod/mod_mime.html
  
  Index: mod_mime.html
  ===================================================================
  RCS file: /home/cvs/apache-1.3/htdocs/manual/mod/mod_mime.html,v
  retrieving revision 1.32
  retrieving revision 1.33
  diff -u -r1.32 -r1.33
  --- mod_mime.html     1999/04/29 19:55:53     1.32
  +++ mod_mime.html     1999/12/09 18:10:39     1.33
  @@ -29,12 +29,12 @@
   
   <P>
   
  -The directives <A HREF="#addencoding">AddEncoding</A>, <A
  -HREF="#addhandler">AddHandler</A>, <A
  -HREF="#addlanguage">AddLanguage</A> and <A HREF="#addtype">AddType</A>
  +The directives <a href="#addcharset">AddCharset</a>,
  +<A HREF="#addencoding">AddEncoding</A>, <A HREF="#addhandler">AddHandler</A>,
  +<A HREF="#addlanguage">AddLanguage</A> and <A HREF="#addtype">AddType</A>
   are all used to map file extensions onto the meta-information for that
  -file.  Respectively they set the content-encoding, handler,
  -content-language and MIME-type (content-type) of documents.  The
  +file.  Respectively they set the character set, content-encoding, handler,
  +content-language, and MIME-type (content-type) of documents.  The
   directive <A HREF="#typesconfig">TypesConfig</A> is used to specify a
   file which also maps extensions onto MIME types. The directives <A
   HREF="#forcetype">ForceType</A> and <A
  @@ -86,6 +86,7 @@
   
   <H2>Directives</H2>
   <UL>
  +<li><a href="#addcharset">AddCharset</a></li>
   <LI><A HREF="#addencoding">AddEncoding</A>
   <LI><A HREF="#addhandler">AddHandler</A>
   <LI><A HREF="#addlanguage">AddLanguage</A>
  @@ -98,6 +99,60 @@
   </UL>
   <HR>
   
  +<H2><A NAME="addcharset">AddCharset</A></H2>
  +<A HREF="directive-dict.html#Syntax" REL="Help"
  +><STRONG>Syntax:</STRONG></A> AddCharset <i>charset extension
  + [extension...]</i><br>
  +<A HREF="directive-dict.html#Context" REL="Help"
  +><STRONG>Context:</STRONG></A> server config, virtual host, directory, 
.htaccess<BR>
  +<A
  + HREF="directive-dict.html#Override"
  + REL="Help"
  +><STRONG>Override:</STRONG></A> FileInfo<BR>
  +<A
  + HREF="directive-dict.html#Status"
  + REL="Help"
  +><STRONG>Status:</STRONG></A> Base<BR>
  +<A
  + HREF="directive-dict.html#Module"
  + REL="Help"
  +><STRONG>Module:</STRONG></A> mod_mime
  +<br>
  +<A HREF="directive-dict.html#Compatibility" REL="Help"
  +><STRONG>Compatibility:</STRONG></A> AddCharset is only available in Apache
  +1.3.10 and later
  +
  +<P>
  +The AddCharset directive maps the given filename extensions to the
  +specified content charset. <i>charset</i> is the MIME charset
  +parameter of filenames containing <i>extension</i>.  This mapping is
  +added to any already in force, overriding any mappings that already
  +exist for the same <i>extension</i>.
  +</P>
  +<P>
  +Example:
  +<pre>
  +    AddLanguage ja .ja
  +    AddCharset EUC-JP .euc
  +    AddCharset ISO-2022-JP .jis
  +    AddCharset SHIFT_JIS .sjis
  +</pre>
  +
  +<P>
  +Then the document <samp>xxxx.ja.jis</samp> will be treated as being a
  +Japanese document whose charset is ISO-2022-JP (as will the document
  +<samp>xxxx.jis.ja</samp>). Although the content charset is reported to
  +the client, the browser is unlikely to use this information. The
  +AddCharset directive is more useful for
  +<A HREF="../content-negotiation.html">content negotiation</A>, where
  +the server returns one from several documents based on the client's
  +charset preference.
  +</P>
  +<P>
  +<STRONG>See also</STRONG>: <A HREF="mod_negotiation.html">mod_negotiation</A>
  +</P>
  +
  +<hr>
   
   <H2><A NAME="addencoding">AddEncoding</A></H2>
   <!--%plaintext &lt;?INDEX {\tt AddEncoding} directive&gt; -->
  
  
  

Reply via email to