I didn't catch that the util_uri functions returned HTTP_OK (200) rather
than APR_SUCCESS.  Fixed.  Some other cleanups and stuff.

I've compiled, tested, and confirmed this on FreeBSD and Solaris.  I
tested it with --enable-modules=all (some of the auth don't compile due
to external dependencies I don't have).  This should be good to go (or 
so I say).

If anyone else can check this out and provide feedback, I'd appreciate 
it.  You'll have to delete the util_uri.h, gen_uri_delims, and 
util_uri.c files in httpd-2.0 for this to work.  -- justin
Index: include/httpd.h
===================================================================
RCS file: /home/cvspublic/httpd-2.0/include/httpd.h,v
retrieving revision 1.150
diff -u -r1.150 httpd.h
--- include/httpd.h     2001/04/26 00:33:12     1.150
+++ include/httpd.h     2001/05/18 08:48:06
@@ -578,7 +578,7 @@
 
 /* ### would be nice to not include this from httpd.h ... */
 /* This comes after we have defined the request_rec type */
-#include "util_uri.h"
+#include "apr_uri.h"
 
 /** A structure that represents one process */
 struct process_rec {
@@ -777,7 +777,7 @@
     /** ST_MODE set to zero if no such file */
     apr_finfo_t finfo;
     /** components of uri, dismantled */
-    uri_components parsed_uri;
+    apr_uri_components parsed_uri;
 
     /* Various other config info which may change with .htaccess files
      * These are config vectors, with one void* pointer for each module
Index: modules/loggers/mod_log_config.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/loggers/mod_log_config.c,v
retrieving revision 1.59
diff -u -r1.59 mod_log_config.c
--- modules/loggers/mod_log_config.c    2001/05/16 13:49:40     1.59
+++ modules/loggers/mod_log_config.c    2001/05/18 08:48:45
@@ -344,7 +344,7 @@
             * (note also that r->the_request contains the unmodified request)
             */
     return (r->parsed_uri.password) ? apr_pstrcat(r->pool, r->method, " ",
-                                        ap_unparse_uri_components(r->pool, 
&r->parsed_uri, 0),
+                                        apr_uri_unparse_components(r->pool, 
&r->parsed_uri, 0),
                                         r->assbackwards ? NULL : " ", 
r->protocol, NULL)
                                        : r->the_request;
 }
Index: modules/mappers/mod_alias.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/mappers/mod_alias.c,v
retrieving revision 1.28
diff -u -r1.28 mod_alias.c
--- modules/mappers/mod_alias.c 2001/05/06 23:27:12     1.28
+++ modules/mappers/mod_alias.c 2001/05/18 08:48:47
@@ -335,8 +335,8 @@
                    found = ap_pregsub(r->pool, p->real, r->uri,
                                    p->regexp->re_nsub + 1, regm);
                    if (found && doesc) {
-                        uri_components uri;
-                        ap_parse_uri_components(r->pool, found, &uri);
+                        apr_uri_components uri;
+                        apr_uri_parse_components(r->pool, found, &uri);
                        found = ap_escape_uri(r->pool, uri.path);
                         if (uri.query) {
                             found = apr_pstrcat(r->pool, found, "?", 
uri.query, NULL);
Index: server/main.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/main.c,v
retrieving revision 1.98
diff -u -r1.98 main.c
--- server/main.c       2001/05/18 11:42:10     1.98
+++ server/main.c       2001/05/20 15:57:29
@@ -72,7 +72,7 @@
 #include "http_log.h" 
 #include "http_config.h"
 #include "http_vhost.h"
-#include "util_uri.h" 
+#include "apr_uri.h" 
 #include "util_ebcdic.h"
 #include "ap_mpm.h"
 
Index: server/scoreboard.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/scoreboard.c,v
retrieving revision 1.23
diff -u -r1.23 scoreboard.c
--- server/scoreboard.c 2001/05/15 02:38:13     1.23
+++ server/scoreboard.c 2001/05/18 08:49:21
@@ -310,7 +310,7 @@
            } else {
                /* Don't reveal the password in the server-status view */
                    apr_cpystrn(ws->request, apr_pstrcat(r->pool, r->method, " 
",
-                                              
ap_unparse_uri_components(r->pool, &r->parsed_uri, UNP_OMITPASSWORD),
+                                              
apr_uri_unparse_components(r->pool, &r->parsed_uri, UNP_OMITPASSWORD),
                                               r->assbackwards ? NULL : " ", 
r->protocol, NULL),
                                       sizeof(ws->request));
            }
Index: srclib/apr-util/uri/Makefile.in
===================================================================
RCS file: /home/cvspublic/apr-util/uri/Makefile.in,v
retrieving revision 1.3
diff -u -r1.3 Makefile.in
--- srclib/apr-util/uri/Makefile.in     2001/01/07 01:35:51     1.3
+++ srclib/apr-util/uri/Makefile.in     2001/05/18 09:18:41
@@ -1,2 +1,12 @@
+TARGETS = uri_delims.h apr_uri.lo
+CLEAN_TARGETS = gen_uri_delims uri_delims.h
+
 # bring in rules.mk for standard functionality
 @INCLUDE_RULES@
+
+gen_uri_delims_OBJECTS = gen_uri_delims.lo
+gen_uri_delims: $(gen_uri_delims_OBJECTS)
+       $(LINK) $(EXTRA_LDFLAGS) $(gen_uri_delims_OBJECTS) $(EXTRA_LIBS)
+
+uri_delims.h: gen_uri_delims
+       ./gen_uri_delims > uri_delims.h

Index: server/protocol.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/protocol.c,v
retrieving revision 1.20
diff -u -r1.20 protocol.c
--- server/protocol.c   2001/05/07 10:01:27     1.20
+++ server/protocol.c   2001/05/20 17:00:49
@@ -326,14 +326,14 @@
     r->unparsed_uri = apr_pstrdup(r->pool, uri);
 
     if (r->method_number == M_CONNECT) {
-       status = ap_parse_hostinfo_components(r->pool, uri, &r->parsed_uri);
+       status = apr_uri_parse_hostinfo_components(r->pool, uri, 
&r->parsed_uri);
     }
     else {
        /* Simple syntax Errors in URLs are trapped by parse_uri_components(). 
*/
-       status = ap_parse_uri_components(r->pool, uri, &r->parsed_uri);
+       status = apr_uri_parse_components(r->pool, uri, &r->parsed_uri);
     }
 
-    if (ap_is_HTTP_SUCCESS(status)) {
+    if (status == APR_SUCCESS) {
        /* if it has a scheme we may need to do absoluteURI vhost stuff */
        if (r->parsed_uri.scheme
            && !strcasecmp(r->parsed_uri.scheme, ap_http_method(r))) {
@@ -361,7 +361,7 @@
     else {
        r->args = NULL;
        r->hostname = NULL;
-       r->status = status;             /* set error status */
+       r->status = HTTP_BAD_REQUEST;             /* set error status */
        r->uri = apr_pstrdup(r->pool, uri);
     }
 }
Index: modules/test/mod_test_util_uri.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/test/mod_test_util_uri.c,v
retrieving revision 1.9
diff -u -r1.9 mod_test_util_uri.c
--- modules/test/mod_test_util_uri.c    2001/02/28 15:24:08     1.9
+++ modules/test/mod_test_util_uri.c    2001/05/20 17:01:02
@@ -142,7 +142,7 @@
     apr_pool_t *sub;
     char *input_uri;
     char *strp;
-    uri_components result;
+    apr_uri_components result;
     unsigned expect;
     int status;
     unsigned failures;
@@ -208,8 +208,8 @@
        *strp = 0;
 
        sub = apr_pool_sub_make(r->pool);
-       status = ap_parse_uri_components(sub, input_uri, &result);
-       if (status == HTTP_OK) {
+       status = apr_uri_parse_components(sub, input_uri, &result);
+       if (status == APR_SUCCESS) {
 #define CHECK(f)                                                       \
            if ((expect & T_##f)                                        \
                && (result.f == NULL || strcmp(result.f, pieces->f))) { \
@@ -228,7 +228,7 @@
            CHECK(fragment)
 #undef CHECK
        }
-       if (status != HTTP_OK) {
+       if (status != APR_SUCCESS) {
            ap_rprintf(r, 
"<tr><td>%d</td><td>0x%02x</td><td>0x%02x</td><td>%d</td><td>\"%s\"</td>", row, 
u, expect, status, input_uri);
 #define DUMP(f)                                                        \
            if (result.f) {                                             \
Index: modules/aaa/mod_auth_digest.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/aaa/mod_auth_digest.c,v
retrieving revision 1.46
diff -u -r1.46 mod_auth_digest.c
--- modules/aaa/mod_auth_digest.c       2001/04/14 23:00:28     1.46
+++ modules/aaa/mod_auth_digest.c       2001/05/20 17:09:16
@@ -114,7 +114,7 @@
 #include "http_request.h"
 #include "http_log.h"
 #include "http_protocol.h"
-#include "util_uri.h"
+#include "apr_uri.h"
 #include "util_md5.h"
 
 #if APR_HAS_SHARED_MEMORY
@@ -226,7 +226,7 @@
     apr_time_t             nonce_time;
     enum hdr_sts          auth_hdr_sts;
     const char           *raw_request_uri;
-    uri_components       *psd_request_uri;
+    apr_uri_components    *psd_request_uri;
     int                   needed_auth;
     client_entry         *client;
 } digest_header_rec;
@@ -1508,8 +1508,8 @@
 }
 
 
-static void copy_uri_components(uri_components *dst, uri_components *src,
-                               request_rec *r) {
+static void copy_uri_components(apr_uri_components *dst, 
+                                apr_uri_components *src, request_rec *r) {
     if (src->scheme && src->scheme[0] != '\0')
        dst->scheme = src->scheme;
     else
@@ -1618,10 +1618,10 @@
        /* Hmm, the simple match didn't work (probably a proxy modified the
         * request-uri), so lets do a more sophisticated match
         */
-       uri_components r_uri, d_uri;
+       apr_uri_components r_uri, d_uri;
 
        copy_uri_components(&r_uri, resp->psd_request_uri, r);
-       if (ap_parse_uri_components(r->pool, resp->uri, &d_uri) != HTTP_OK) {
+       if (apr_uri_parse_components(r->pool, resp->uri, &d_uri) != 
APR_SUCCESS) {
            ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, r,
                          "Digest: invalid uri <%s> in Authorization header",
                          resp->uri);
Index: modules/dav/main/util.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/modules/dav/main/util.c,v
retrieving revision 1.30
diff -u -r1.30 util.c
--- modules/dav/main/util.c     2001/05/01 11:17:07     1.30
+++ modules/dav/main/util.c     2001/05/20 17:10:53
@@ -189,12 +189,12 @@
     dav_lookup_result result = { 0 };
     const char *scheme;
     apr_port_t port;
-    uri_components comp;
+    apr_uri_components comp;
     char *new_file;
     const char *domain;
 
     /* first thing to do is parse the URI into various components */
-    if (ap_parse_uri_components(r->pool, uri, &comp) != HTTP_OK) {
+    if (apr_uri_parse_components(r->pool, uri, &comp) != APR_SUCCESS) {
        result.err.status = HTTP_BAD_REQUEST;
        result.err.desc = "Invalid syntax in Destination URI.";
        return result;
@@ -233,7 +233,7 @@
 
         /* insert a port if the URI did not contain one */
         if (comp.port == 0)
-            comp.port = ap_default_port_for_scheme(comp.scheme);
+            comp.port = apr_uri_default_port_for_scheme(comp.scheme);
 
         /* now, verify that the URI uses the same scheme as the current.
            request. the port must match our port.
@@ -288,7 +288,7 @@
        the current request. Therefore, we can use ap_sub_req_lookup_uri() */
 
     /* reconstruct a URI as just the path */
-    new_file = ap_unparse_uri_components(r->pool, &comp, UNP_OMITSITEPART);
+    new_file = apr_uri_unparse_components(r->pool, &comp, UNP_OMITSITEPART);
 
     /*
      * Lookup the URI and return the sub-request. Note that we use the
@@ -542,7 +542,7 @@
     const char *uri = NULL;    /* scope of current production; NULL=no-tag */
     size_t uri_len = 0;
     dav_if_header *ih = NULL;
-    uri_components parsed_uri;
+    apr_uri_components parsed_uri;
     const dav_hooks_locks *locks_hooks = DAV_GET_HOOKS_LOCKS(r);
     enum {no_tagged, tagged, unknown} list_type = unknown;
     int condition;
@@ -566,7 +566,7 @@
             
             /* 2518 specifies this must be an absolute URI; just take the
              * relative part for later comparison against r->uri */
-            if (ap_parse_uri_components(r->pool, uri, &parsed_uri) != HTTP_OK) 
{
+            if (apr_uri_parse_components(r->pool, uri, &parsed_uri) != 
APR_SUCCESS) {
                 return dav_new_error(r->pool, HTTP_BAD_REQUEST,
                                      DAV_ERR_IF_TAGGED,
                                      "Invalid URI in tagged If-header.");
Index: server/Makefile.in
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/Makefile.in,v
retrieving revision 1.43
diff -u -r1.43 Makefile.in
--- server/Makefile.in  2001/05/18 00:48:57     1.43
+++ server/Makefile.in  2001/05/20 17:13:07
@@ -1,6 +1,6 @@
 
 TARGET_EXPORTS    = apache.exports
-CLEAN_TARGETS = gen_test_char gen_uri_delims test_char.h uri_delims.h \
+CLEAN_TARGETS = gen_test_char test_char.h \
        $(TARGET_EXPORTS) ApacheCoreOS2.def
 EXTRACLEAN_TARGETS = exports.c
 
@@ -8,9 +8,9 @@
 
 LTLIBRARY_NAME    = libmain.la
 LTLIBRARY_SOURCES = \
-    uri_delims.h test_char.h \
+    test_char.h \
        config.c log.c main.c vhost.c util.c util_date.c \
-       util_script.c util_uri.c util_md5.c util_cfgtree.c util_ebcdic.c \
+       util_script.c util_md5.c util_cfgtree.c util_ebcdic.c \
        rfc1413.c connection.c listen.c \
         mpm_common.c util_charset.c util_debug.c util_xml.c \
        util_filter.c exports.c buildmark.c scoreboard.c \
@@ -21,21 +21,13 @@
 include $(top_srcdir)/build/rules.mk
 include $(top_srcdir)/build/library.mk
 
-gen_uri_delims_OBJECTS = gen_uri_delims.lo
-gen_uri_delims: $(gen_uri_delims_OBJECTS)
-       $(LINK) $(EXTRA_LDFLAGS) $(gen_uri_delims_OBJECTS) $(EXTRA_LIBS)
-
 gen_test_char_OBJECTS = gen_test_char.lo util_debug.lo
 gen_test_char: $(gen_test_char_OBJECTS)
        $(LINK) $(EXTRA_LDFLAGS) $(gen_test_char_OBJECTS) $(EXTRA_LIBS)
 
-uri_delims.h: gen_uri_delims
-       ./gen_uri_delims > uri_delims.h
-
 test_char.h: gen_test_char
        ./gen_test_char > test_char.h
 
-util_uri.lo: uri_delims.h
 util.lo: test_char.h
 
 EXPORT_FILES = ../srclib/apr/apr.exports ../srclib/apr-util/aprutil.exports \
/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 * Portions of this software are based upon public domain software
 * originally written at the National Center for Supercomputing Applications,
 * University of Illinois, Urbana-Champaign.
 */

/*
 * apr_uri.h: External Interface of apr_uri.c
 */

#ifndef APR_URI_H
#define APR_URI_H

#include "apu.h"

#include <apr_network_io.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * @package Apache URI library
 */

typedef struct schemes_t schemes_t;

/** Structure to store various schemes and their default ports */
struct schemes_t {
    /** The name of the scheme */
    const char *name;
    /** The default port for the scheme */
    apr_port_t default_port;
};

#define DEFAULT_FTP_DATA_PORT   20
#define DEFAULT_FTP_PORT        21
#define DEFAULT_GOPHER_PORT     70
#ifndef DEFAULT_HTTP_PORT
#define DEFAULT_HTTP_PORT       80
#endif
#define DEFAULT_NNTP_PORT       119
#define DEFAULT_WAIS_PORT       210
#ifndef DEFAULT_HTTPS_PORT
#define DEFAULT_HTTPS_PORT      443
#endif
#define DEFAULT_SNEWS_PORT      563
#define DEFAULT_PROSPERO_PORT   1525    /* WARNING: conflict w/Oracle */

/* Flags passed to unparse_uri_components(): */
#define UNP_OMITSITEPART        (1U<<0) /* suppress "scheme://[EMAIL 
PROTECTED]:port" */
#define UNP_OMITUSER            (1U<<1) /* Just omit user */
#define UNP_OMITPASSWORD        (1U<<2) /* Just omit password */
#define UNP_OMITUSERINFO        (UNP_OMITUSER|UNP_OMITPASSWORD) /* omit 
"user:password@" part */
#define UNP_REVEALPASSWORD      (1U<<3) /* Show plain text password (default: 
show XXXXXXXX) */
#define UNP_OMITPATHINFO        (1U<<4) /* Show "scheme://[EMAIL 
PROTECTED]:port" only */
#define UNP_OMITQUERY           (1U<<5) /* Omit the "?queryarg" from the path */

typedef struct apr_uri_components apr_uri_components;

/**
 * A structure to encompass all of the fields in a uri
 */
struct apr_uri_components {
    /** scheme ("http"/"ftp"/...) */
    char *scheme;
    /** combined [user[:[EMAIL PROTECTED]:port] */
    char *hostinfo;
    /** user name, as in http://user:[EMAIL PROTECTED]:port/ */
    char *user;
    /** password, as in http://user:[EMAIL PROTECTED]:port/ */
    char *password;
    /** hostname from URI (or from Host: header) */
    char *hostname;
    /** port string (integer representation is in "port") */
    char *port_str;
    /** the request path (or "/" if only scheme://host was given) */
    char *path;
    /** Everything after a '?' in the path, if present */
    char *query;
    /** Trailing "#fragment" string, if present */
    char *fragment;

    /** structure returned from gethostbyname() 
     *  @defvar struct hostent *hostent */
    struct hostent *hostent;

    /** The port number, numeric, valid only if port_str != NULL */
    apr_port_t port;
    
    /** has the structure been initialized */
    unsigned is_initialized:1;

    /** has the DNS been looked up yet */
    unsigned dns_looked_up:1;
    /** has the dns been resolved yet */
    unsigned dns_resolved:1;
};

/* apr_uri.c */
/**
 * Return the default port for a given scheme.  The schemes recognized are
 * http, ftp, https, gopher, wais, nntp, snews, and prospero
 * @param scheme_str The string that contains the current scheme
 * @return The default port for this scheme
 * @deffunc apr_port_t apr_uri_default_port_for_scheme(const char *scheme_str)
 */ 
APU_DECLARE(apr_port_t) apr_uri_default_port_for_scheme(const char *scheme_str);

/**
 * Unparse a apr_uri_components structure to an URI string.  Optionally 
 * suppress the password for security reasons.
 * @param p The pool to allocate out of
 * @param uptr All of the parts of the uri
 * @param flags How to unparse the uri.  One of:
 * <PRE>
 *    UNP_OMITSITEPART        suppress "scheme://[EMAIL PROTECTED]:port" 
 *    UNP_OMITUSER            Just omit user 
 *    UNP_OMITPASSWORD        Just omit password 
 *    UNP_OMITUSERINFO        omit "user:password@" part 
 *    UNP_REVEALPASSWORD      Show plain text password (default: show XXXXXXXX) 
 *    UNP_OMITPATHINFO        Show "scheme://[EMAIL PROTECTED]:port" only 
 *    UNP_OMITQUERY           Omit the "?queryarg" from the path 
 * </PRE>
 * @return The uri as a string
 * @deffunc char * apr_uri_unparse_components(apr_pool_t *p, const 
apr_uri_components *uptr, unsigned flags)
 */
APU_DECLARE(char *) apr_uri_unparse_components(apr_pool_t *p, 
                                               const apr_uri_components *uptr,
                                               unsigned flags);

/**
 * Parse a given URI, fill in all supplied fields of a apr_uri_components
 * structure. This eliminates the necessity of extracting host, port,
 * path, query info repeatedly in the modules.
 * @param p The pool to allocate out of
 * @param uri The uri to parse
 * @param uptr The apr_uri_components to fill out
 * @return An HTTP status code
 * @deffunc int apr_uri_parse_components(apr_pool_t *p, const char *uri, 
apr_uri_components *uptr)
 */
APU_DECLARE(int) apr_uri_parse_components(apr_pool_t *p, const char *uri, 
                                          apr_uri_components *uptr);

/**
 * Special case for CONNECT parsing: it comes with the hostinfo part only
 * @param p The pool to allocate out of
 * @param hostinfo The hostinfo string to parse
 * @param uptr The apr_uri_components to fill out
 * @return An HTTP status code
 * @deffunc int apr_parse_hostinfo_components(apr_pool_t *p, const char 
*hostinfo, apr_uri_components *uptr)
 */
APU_DECLARE(int) apr_uri_parse_hostinfo_components(apr_pool_t *p, 
                                                   const char *hostinfo, 
                                                   apr_uri_components *uptr);

#ifdef __cplusplus
}
#endif

#endif /*APR_URI_H*/
/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 * Portions of this software are based upon public domain software
 * originally written at the National Center for Supercomputing Applications,
 * University of Illinois, Urbana-Champaign.
 */

/*
 * apr_uri.c: URI related utility things
 * 
 */

#include <stdlib.h>

#include "apu.h"
#include "apr.h"
#include "apr_strings.h"

#define APR_WANT_STRFUNC
#include "apr_want.h"

#include "apr_uri.h"

/* Some WWW schemes and their default ports; this is basically /etc/services */
/* This will become global when the protocol abstraction comes */
/* As the schemes are searched by a linear search, */
/* they are sorted by their expected frequency */
static schemes_t schemes[] =
{
    {"http",   DEFAULT_HTTP_PORT},
    {"ftp",    DEFAULT_FTP_PORT},
    {"https",  DEFAULT_HTTPS_PORT},
    {"gopher", DEFAULT_GOPHER_PORT},
    {"wais",   DEFAULT_WAIS_PORT},
    {"nntp",   DEFAULT_NNTP_PORT},
    {"snews",  DEFAULT_SNEWS_PORT},
    {"prospero", DEFAULT_PROSPERO_PORT},
    { NULL, 0xFFFF }                    /* unknown port */
};

APU_DECLARE(apr_port_t) apr_uri_default_port_for_scheme(const char *scheme_str)
{
    schemes_t *scheme;

    for (scheme = schemes; scheme->name != NULL; ++scheme)
        if (strcasecmp(scheme_str, scheme->name) == 0)
            return scheme->default_port;

    return 0;
}

/* Unparse a apr_uri_components structure to an URI string.
 * Optionally suppress the password for security reasons.
 */
APU_DECLARE(char *) apr_uri_unparse_components(apr_pool_t *p, 
                                               const apr_uri_components *uptr, 
                                               unsigned flags)
{
    char *ret = "";

    /* If suppressing the site part, omit both user name & scheme://hostname */
    if (!(flags & UNP_OMITSITEPART)) {

        /* Construct a "user:password@" string, honoring the passed UNP_ flags: 
*/
        if (uptr->user||uptr->password)
            ret = apr_pstrcat (p,
                        (uptr->user     && !(flags & UNP_OMITUSER)) ? 
uptr->user : "",
                        (uptr->password && !(flags & UNP_OMITPASSWORD)) ? ":" : 
"",
                        (uptr->password && !(flags & UNP_OMITPASSWORD))
                           ? ((flags & UNP_REVEALPASSWORD) ? uptr->password : 
"XXXXXXXX")
                           : "",
                        "@", NULL);

        /* Construct scheme://site string */
        if (uptr->hostname) {
            int is_default_port;

            is_default_port =
                (uptr->port_str == NULL ||
                 uptr->port == 0 ||
                 uptr->port == apr_uri_default_port_for_scheme(uptr->scheme));

            ret = apr_pstrcat (p,
                        uptr->scheme, "://", ret, 
                        uptr->hostname ? uptr->hostname : "",
                        is_default_port ? "" : ":",
                        is_default_port ? "" : uptr->port_str,
                        NULL);
        }
    }

    /* Should we suppress all path info? */
    if (!(flags & UNP_OMITPATHINFO)) {
        /* Append path, query and fragment strings: */
        ret = apr_pstrcat (p,
                ret,
                uptr->path ? uptr->path : "",
                (uptr->query    && !(flags & UNP_OMITQUERY)) ? "?" : "",
                (uptr->query    && !(flags & UNP_OMITQUERY)) ? uptr->query : "",
                (uptr->fragment && !(flags & UNP_OMITQUERY)) ? "#" : NULL,
                (uptr->fragment && !(flags & UNP_OMITQUERY)) ? uptr->fragment : 
NULL,
                NULL);
    }
    return ret;
}

/* Here is the hand-optimized parse_uri_components().  There are some wild
 * tricks we could pull in assembly language that we don't pull here... like we
 * can do word-at-time scans for delimiter characters using the same technique
 * that fast memchr()s use.  But that would be way non-portable. -djg
 */

/* We have a apr_table_t that we can index by character and it tells us if the
 * character is one of the interesting delimiters.  Note that we even get
 * compares for NUL for free -- it's just another delimiter.
 */

#define T_COLON         0x01    /* ':' */
#define T_SLASH         0x02    /* '/' */
#define T_QUESTION      0x04    /* '?' */
#define T_HASH          0x08    /* '#' */
#define T_NUL           0x80    /* '\0' */

/* the uri_delims.h file is autogenerated by gen_uri_delims.c */
#include "uri_delims.h"

/* it works like this:
    if (uri_delims[ch] & NOTEND_foobar) {
        then we're not at a delimiter for foobar
    }
*/

/* Note that we optimize the scheme scanning here, we cheat and let the
 * compiler know that it doesn't have to do the & masking.
 */
#define NOTEND_SCHEME   (0xff)
#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
#define NOTEND_PATH     (T_QUESTION | T_HASH | T_NUL)

/* parse_uri_components():
 * Parse a given URI, fill in all supplied fields of a uri_components
 * structure. This eliminates the necessity of extracting host, port,
 * path, query info repeatedly in the modules.
 * Side effects:
 *  - fills in fields of uri_components *uptr
 *  - none on any of the r->* fields
 */
APU_DECLARE(int) apr_uri_parse_components(apr_pool_t *p, const char *uri, 
                                          apr_uri_components *uptr)
{
    const char *s;
    const char *s1;
    const char *hostinfo;
    char *endstr;
    int port;

    /* Initialize the structure. parse_uri() and parse_uri_components()
     * can be called more than once per request.
     */
    memset (uptr, '\0', sizeof(*uptr));
    uptr->is_initialized = 1;

    /* We assume the processor has a branch predictor like most --
     * it assumes forward branches are untaken and backwards are taken.  That's
     * the reason for the gotos.  -djg
     */
    if (uri[0] == '/') {
deal_with_path:
        /* we expect uri to point to first character of path ... remember
         * that the path could be empty -- http://foobar?query for example
         */
        s = uri;
        while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
            ++s;
        }
        if (s != uri) {
            uptr->path = apr_pstrndup(p, uri, s - uri);
        }
        if (*s == 0) {
            return APR_SUCCESS;
        }
        if (*s == '?') {
            ++s;
            s1 = strchr(s, '#');
            if (s1) {
                uptr->fragment = apr_pstrdup(p, s1 + 1);
                uptr->query = apr_pstrndup(p, s, s1 - s);
            }
            else {
                uptr->query = apr_pstrdup(p, s);
            }
            return APR_SUCCESS;
        }
        /* otherwise it's a fragment */
        uptr->fragment = apr_pstrdup(p, s + 1);
        return APR_SUCCESS;
    }

    /* find the scheme: */
    s = uri;
    while ((uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0) {
        ++s;
    }
    /* scheme must be non-empty and followed by :// */
    if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') {
        goto deal_with_path;    /* backwards predicted taken! */
    }

    uptr->scheme = apr_pstrndup(p, uri, s - uri);
    s += 3;
    hostinfo = s;
    while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
        ++s;
    }
    uri = s;    /* whatever follows hostinfo is start of uri */
    uptr->hostinfo = apr_pstrndup(p, hostinfo, uri - hostinfo);

    /* If there's a username:[EMAIL PROTECTED]:port, the @ we want is the last 
@...
     * too bad there's no memrchr()... For the C purists, note that hostinfo
     * is definately not the first character of the original uri so therefore
     * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
     */
    do {
        --s;
    } while (s >= hostinfo && *s != '@');
    if (s < hostinfo) {
        /* again we want the common case to be fall through */
deal_with_host:
        /* We expect hostinfo to point to the first character of
         * the hostname.  If there's a port it is the first colon.
         */
        s = memchr(hostinfo, ':', uri - hostinfo);
        if (s == NULL) {
            /* we expect the common case to have no port */
            uptr->hostname = apr_pstrndup(p, hostinfo, uri - hostinfo);
            goto deal_with_path;
        }
        uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo);
        ++s;
        uptr->port_str = apr_pstrndup(p, s, uri - s);
        if (uri != s) {
            port = strtol(uptr->port_str, &endstr, 10);
            uptr->port = port;
            if (*endstr == '\0') {
                goto deal_with_path;
            }
            /* Invalid characters after ':' found */
            return APR_EGENERAL;
        }
        uptr->port = apr_uri_default_port_for_scheme(uptr->scheme);
        goto deal_with_path;
    }

    /* first colon delimits username:password */
    s1 = memchr(hostinfo, ':', s - hostinfo);
    if (s1) {
        uptr->user = apr_pstrndup(p, hostinfo, s1 - hostinfo);
        ++s1;
        uptr->password = apr_pstrndup(p, s1, s - s1);
    }
    else {
        uptr->user = apr_pstrndup(p, hostinfo, s - hostinfo);
    }
    hostinfo = s + 1;
    goto deal_with_host;
}

/* Special case for CONNECT parsing: it comes with the hostinfo part only */
/* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
 * for the format of the "CONNECT host:port HTTP/1.0" request
 */
APU_DECLARE(int) apr_uri_parse_hostinfo_components(apr_pool_t *p, 
                                                   const char *hostinfo, 
                                                   apr_uri_components *uptr)
{
    const char *s;
    char *endstr;

    /* Initialize the structure. parse_uri() and parse_uri_components()
     * can be called more than once per request.
     */
    memset (uptr, '\0', sizeof(*uptr));
    uptr->is_initialized = 1;
    uptr->hostinfo = apr_pstrdup(p, hostinfo);

    /* We expect hostinfo to point to the first character of
     * the hostname.  There must be a port, separated by a colon
     */
    s = strchr(hostinfo, ':');
    if (s == NULL) {
        return APR_EGENERAL;
    }
    uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo);
    ++s;
    uptr->port_str = apr_pstrdup(p, s);
    if (*s != '\0') {
        uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
        if (*endstr == '\0') {
            return APR_SUCCESS;
        }
        /* Invalid characters after ':' found */
    }
    return APR_EGENERAL;
}
/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 * Portions of this software are based upon public domain software
 * originally written at the National Center for Supercomputing Applications,
 * University of Illinois, Urbana-Champaign.
 */

#include <stdio.h>

/* generate a apr_table_t of 256 values, where certain characters are
 * marked "interesting"... for the uri parsing process.
 */

int main(int argc, char *argv[])
{
    int i;
    char *value;

    printf("/* this file is automatically generated by "
            "gen_uri_delims, do not edit */\n");
    printf("static const unsigned char uri_delims[256] = {");
    for (i = 0; i < 256; ++i) {
        if (i % 20 == 0)
            printf("\n    ");
        switch (i) {
        case ':':       value = "T_COLON";      break;
        case '/':       value = "T_SLASH";      break;
        case '?':       value = "T_QUESTION";   break;
        case '#':       value = "T_HASH";       break;
        case '\0':      value = "T_NUL";        break;
        default:        value = "0";            break;
        }
        printf("%s%c", value, (i < 255) ? ',' : ' ');
    }
    printf("\n};\n");

    return 0;
}

Reply via email to