From ebbcc923b776fd3fbfe28c5c050ee8df7b71529d Mon Sep 17 00:00:00 2001
From: Daniel Hartwig <mandyke@gmail.com>
Date: Thu, 8 Nov 2012 13:11:52 +0800
Subject: [PATCH 3/3] uri-reference support

* module/web/uri.scm (build-uri, validate-uri): Also build
  relative-refs, which have no scheme.
  (string->uri, uri->string): Also support URI objects with no scheme.
  (uri-reference?, relative-ref?, absolute-uri?): New predicates to
  distinguish various URI-like objects.
  (uri?): Redefine so that semantics are unchanged; only return #t for
  objects previously built and validated by build-uri?.  Such objects
  always had a uri-scheme.
* module/web/http.scm (declare-uri-reference-header!): New header type
  accepting any URI-reference.
  ("Content-Location", "Referer"): Change to URI-reference type.
* doc/ref/web.texi (URIs): Document other URI-like syntaxes defined in
  RFC 3986.  Include brief discussion.  Update functions
* test-suite/tests/web-http.test:
* test-suite/tests/web-uri.test: Add relevant tests.
---
 doc/ref/web.texi               |   39 ++++++++++---
 module/web/http.scm            |   13 ++++-
 module/web/uri.scm             |   59 ++++++++++++++-----
 test-suite/tests/web-http.test |    4 ++
 test-suite/tests/web-uri.test  |  125 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 214 insertions(+), 26 deletions(-)

diff --git a/doc/ref/web.texi b/doc/ref/web.texi
index 3e93bea..d247080 100644
--- a/doc/ref/web.texi
+++ b/doc/ref/web.texi
@@ -173,8 +173,9 @@ Guile provides a standard data type for Universal Resource Identifiers
 The generic URI syntax is as follows:
 
 @example
-URI := scheme ":" ["//" [userinfo "@@"] host [":" port]] path \
-       [ "?" query ] [ "#" fragment ]
+URI := scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+
+hier-part := ["//" [userinfo "@@"] host [":" port]] path
 @end example
 
 For example, in the URI, @indicateurl{http://www.gnu.org/help/}, the
@@ -198,6 +199,25 @@ fragment identifies a part of a resource, not the resource itself.  But
 it is useful to have a fragment field in the URI record itself, so we
 hope you will forgive the inconsistency.
 
+There are some additional URI-like syntaxes:
+
+@example
+URI-reference := URI / relative-ref
+
+relative-ref := hier-part [ "?" query ] [ "#" fragment ]
+
+absolute-URI := scheme ":" hier-part [ "?" query ]
+@end example
+
+These extra forms are useful in various situations.  For example,
+relative-refs are convenient in documents to refer to other parts of the
+same document, or resources located on the same site.
+
+A relative-ref must be considered in relation to a given base URI to
+correctly identify a resource.  The base URI is determined according to
+the context and properties of the document in which the relative-ref is
+located.  See section 5 of RFC 3986 for details.
+
 @example
 (use-modules (web uri))
 @end example
@@ -211,12 +231,14 @@ access to them.
        [#:fragment=@code{#f}] [#:validate?=@code{#t}]
 Construct a URI object.  @var{scheme} should be a symbol, @var{port}
 either a positive, exact integer or @code{#f}, and the rest of the
-fields are either strings or @code{#f}.  If @var{validate?} is true,
-also run some consistency checks to make sure that the constructed URI
-is valid.
+fields are strings.  If @var{validate?} is true, also run some
+consistency checks to make sure that the constructed URI is valid.
 @end deffn
 
 @deffn {Scheme Procedure} uri? x
+@deffnx {Scheme Procedure} uri-reference? x
+@deffnx {Scheme Procedure} relative-ref? x
+@deffnx {Scheme Procedure} absolute-uri? x
 @deffnx {Scheme Procedure} uri-scheme uri
 @deffnx {Scheme Procedure} uri-userinfo uri
 @deffnx {Scheme Procedure} uri-host uri
@@ -224,9 +246,10 @@ is valid.
 @deffnx {Scheme Procedure} uri-path uri
 @deffnx {Scheme Procedure} uri-query uri
 @deffnx {Scheme Procedure} uri-fragment uri
-A predicate and field accessors for the URI record type.  The URI scheme
-will be a symbol, the port either a positive, exact integer or @code{#f},
-and the rest either strings or @code{#f} if not present.
+Predicates and field accessors for the URI record type.  The URI scheme
+will be a symbol, the port a positive, exact integer, and the rest
+strings.  Any field other than @code{uri-path} may also be @code{#f} if
+not present.
 @end deffn
 
 @deffn {Scheme Procedure} string->uri string
diff --git a/module/web/http.scm b/module/web/http.scm
index 3b78d08..389880b 100644
--- a/module/web/http.scm
+++ b/module/web/http.scm
@@ -1179,12 +1179,19 @@ treated specially, and is just returned as a plain string."
     parse-non-negative-integer non-negative-integer? display))
 
 ;; emacs: (put 'declare-uri-header! 'scheme-indent-function 1)
-(define (declare-uri-header! name)
+(define* (declare-uri-header! name #:optional)
   (declare-header! name
     (lambda (str) (or (string->uri str) (bad-header-component 'uri str)))
     uri?
     write-uri))
 
+;; emacs: (put 'declare-uri-reference-header! 'scheme-indent-function 1)
+(define (declare-uri-reference-header! name)
+  (declare-header! name
+    (lambda (str) (or (string->uri str) (bad-header-component 'uri-reference str)))
+    uri-reference?
+    write-uri))
+
 ;; emacs: (put 'declare-quality-list-header! 'scheme-indent-function 1)
 (define (declare-quality-list-header! name)
   (declare-header! name
@@ -1437,7 +1444,7 @@ treated specially, and is just returned as a plain string."
 
 ;; Content-Location = ( absoluteURI | relativeURI )
 ;;
-(declare-uri-header! "Content-Location")
+(declare-uri-reference-header! "Content-Location")
 
 ;; Content-MD5 = <base64 of 128 bit MD5 digest as per RFC 1864>
 ;;
@@ -1726,7 +1733,7 @@ treated specially, and is just returned as a plain string."
 
 ;; Referer = ( absoluteURI | relativeURI )
 ;;
-(declare-uri-header! "Referer")
+(declare-uri-reference-header! "Referer")
 
 ;; TE = #( t-codings )
 ;; t-codings = "trailers" | ( transfer-extension [ accept-params ] )
diff --git a/module/web/uri.scm b/module/web/uri.scm
index e84bc03..e7990ad 100644
--- a/module/web/uri.scm
+++ b/module/web/uri.scm
@@ -31,7 +31,7 @@
   #:use-module (ice-9 control)
   #:use-module (rnrs bytevectors)
   #:use-module (ice-9 binary-ports)
-  #:export (uri?
+  #:export (uri? uri-reference? relative-ref? absolute-uri?
             uri-scheme uri-userinfo uri-host uri-port
             uri-path uri-query uri-fragment
 
@@ -44,7 +44,7 @@
 
 (define-record-type <uri>
   (make-uri scheme userinfo host port path query fragment)
-  uri?
+  uri-reference?
   (scheme uri-scheme)
   (userinfo uri-userinfo)
   (host uri-host)
@@ -53,15 +53,32 @@
   (query uri-query)
   (fragment uri-fragment))
 
+(define (uri? x)
+  (and (uri-reference? x)
+       (uri-scheme x)
+       #t))
+
+(define (relative-ref? x)
+  (and (uri-reference? x)
+       (not (uri-scheme x))
+       #t))
+
+(define (absolute-uri? x)
+  (and (uri-reference? x)
+       (uri-scheme x)
+       (not (uri-fragment x))
+       #t))
+
 (define (uri-error message . args)
   (throw 'uri-error message args))
 
 (define (positive-exact-integer? port)
   (and (number? port) (exact? port) (integer? port) (positive? port)))
 
-(define (validate-uri scheme userinfo host port path query fragment)
+(define (validate-uri scheme userinfo host port path query fragment reference?)
   (cond
-   ((not (symbol? scheme))
+   ((and (not (symbol? scheme))
+         (or (not reference?) scheme))
     (uri-error "Expected a symbol for the URI scheme: ~s" scheme))
    ((and (or userinfo port) (not host))
     (uri-error "Expected a host, given userinfo or port"))
@@ -85,7 +102,7 @@ fields are either strings or @code{#f}.  If @var{validator?} is true,
 also run some consistency checks to make sure that the constructed URI
 is valid."
   (if validate?
-      (validate-uri scheme userinfo host port path query fragment))
+      (validate-uri scheme userinfo host port path query fragment #t))
   (make-uri scheme userinfo host port path query fragment))
 
 ;; See RFC 3986 #3.2.2 for comments on percent-encodings, IDNA (RFC
@@ -153,6 +170,17 @@ is valid."
 ;;;               / path-absolute
 ;;;               / path-rootless
 ;;;               / path-empty
+;;;
+;;; RFC 3986, #4.
+;;;
+;;;   URI-reference = URI / relative-ref
+;;;
+;;;   relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
+;;;
+;;;   relative-part = "//" authority path-abempty
+;;;                 / path-absolute
+;;;                 / path-noscheme
+;;;                 / path-empty
 
 (define scheme-pat
   "[a-zA-Z][a-zA-Z0-9+.-]*")
@@ -165,7 +193,7 @@ is valid."
 (define fragment-pat
   ".*")
 (define uri-pat
-  (format #f "^(~a):(//~a)?(~a)(\\?(~a))?(#(~a))?$"
+  (format #f "^((~a):)?(//~a)?(~a)(\\?(~a))?(#(~a))?$"
           scheme-pat authority-pat path-pat query-pat fragment-pat))
 (define uri-regexp
   (make-regexp uri-pat))
@@ -175,12 +203,12 @@ is valid."
 could not be parsed."
   (% (let ((m (regexp-exec uri-regexp string)))
        (if (not m) (abort))
-       (let ((scheme (string->symbol
-                      (string-downcase (match:substring m 1))))
-             (authority (match:substring m 2))
-             (path (match:substring m 3))
-             (query (match:substring m 5))
-             (fragment (match:substring m 7)))
+       (let ((scheme (let ((s (match:substring m 2)))
+                       (and s (string->symbol (string-downcase s)))))
+             (authority (match:substring m 3))
+             (path (match:substring m 4))
+             (query (match:substring m 6))
+             (fragment (match:substring m 8)))
          (call-with-values
              (lambda ()
                (if authority
@@ -208,8 +236,7 @@ could not be parsed."
   "Serialize @var{uri} to a string.  If the URI has a port that is the
 default port for its scheme, the port is not included in the
 serialization."
-  (let* ((scheme-str (string-append
-                      (symbol->string (uri-scheme uri)) ":"))
+  (let* ((scheme (uri-scheme uri))
          (userinfo (uri-userinfo uri))
          (host (uri-host uri))
          (port (uri-port uri))
@@ -217,7 +244,9 @@ serialization."
          (query (uri-query uri))
          (fragment (uri-fragment uri)))
     (string-append
-     scheme-str
+     (if scheme
+         (string-append (symbol->string scheme) ":")
+         "")
      (if host
          (string-append "//"
                         (if userinfo (string-append userinfo "@")
diff --git a/test-suite/tests/web-http.test b/test-suite/tests/web-http.test
index 97f5559..5d80fde 100644
--- a/test-suite/tests/web-http.test
+++ b/test-suite/tests/web-http.test
@@ -145,6 +145,8 @@
   (pass-if-parse content-length "010" 10)
   (pass-if-parse content-location "http://foo/"
                  (build-uri 'http #:host "foo" #:path "/"))
+  (pass-if-parse content-location "//foo"
+                 (build-uri #f #:host "foo"))
   (pass-if-parse content-range "bytes 10-20/*" '(bytes (10 . 20) *))
   (pass-if-parse content-range "bytes */*" '(bytes * *))
   (pass-if-parse content-range "bytes */30" '(bytes * 30))
@@ -208,6 +210,8 @@
   (pass-if-parse range "bytes=-20,-30" '(bytes (#f . 20) (#f . 30)))
   (pass-if-parse referer "http://foo/bar?baz"
                  (build-uri 'http #:host "foo" #:path "/bar" #:query "baz"))
+  (pass-if-parse referer "//foo/bar?baz"
+                 (build-uri #f #:host "foo" #:path "/bar" #:query "baz"))
   (pass-if-parse te "trailers" '((trailers)))
   (pass-if-parse te "trailers,foo" '((trailers) (foo)))
   (pass-if-parse user-agent "guile" "guile"))
diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test
index 3f6e7e3..d4d44d6 100644
--- a/test-suite/tests/web-uri.test
+++ b/test-suite/tests/web-uri.test
@@ -78,6 +78,24 @@
            #:port 22
            #:path "/baz"))
 
+  (pass-if "foo"
+    (uri=? (build-uri #f #:path "foo")
+           #:path "foo"))
+
+  (pass-if "/foo"
+    (uri=? (build-uri #f #:path "/foo")
+           #:path "/foo"))
+
+  (pass-if "//foo/bar"
+    (uri=? (build-uri #f #:host "foo" #:path "/bar")
+           #:host "foo"
+           #:path "/bar"))
+
+  (pass-if "?foo"
+    (uri=? (build-uri #f #:query "foo")
+           #:path ""
+           #:query "foo"))
+
   (pass-if-uri-exception "non-symbol scheme"
                          "Expected.*symbol"
                          (build-uri "nonsym"))
@@ -123,6 +141,80 @@
                          "Expected.*host"
                          (build-uri 'http #:userinfo "foo")))
 
+(with-test-prefix "absolute-uri?"
+  (pass-if "ftp:"
+    (absolute-uri? (build-uri 'ftp)))
+
+  (pass-if "ftp:foo"
+    (absolute-uri? (build-uri 'ftp #:path "foo")))
+
+  (pass-if "ftp://foo/bar"
+    (absolute-uri? (build-uri 'ftp #:host "foo" #:path "/bar")))
+
+  (pass-if "ftp://foo@bar:22/baz"
+    (absolute-uri? (build-uri 'ftp #:userinfo "foo" #:host "bar" #:port 22 #:path "/baz")))
+
+  (expect-fail "foo"
+    (absolute-uri? (build-uri #f #:path "foo")))
+
+  (expect-fail "/foo"
+    (absolute-uri? (build-uri #f #:path "/foo")))
+
+  (expect-fail "//foo/bar"
+    (absolute-uri? (build-uri #f #:host "foo" #:path "/bar")))
+
+  (expect-fail "?foo"
+    (absolute-uri? (build-uri #f #:query "foo"))))
+
+(with-test-prefix "relative-ref?"
+  (expect-fail "ftp:"
+    (relative-ref? (build-uri 'ftp)))
+
+  (expect-fail "ftp:foo"
+    (relative-ref? (build-uri 'ftp #:path "foo")))
+
+  (expect-fail "ftp://foo/bar"
+    (relative-ref? (build-uri 'ftp #:host "foo" #:path "/bar")))
+
+  (expect-fail "ftp://foo@bar:22/baz"
+    (relative-ref? (build-uri 'ftp #:userinfo "foo" #:host "bar" #:port 22 #:path "/baz")))
+
+  (pass-if "foo"
+    (relative-ref? (build-uri #f #:path "foo")))
+
+  (pass-if "/foo"
+    (relative-ref? (build-uri #f #:path "/foo")))
+
+  (pass-if "//foo/bar"
+    (relative-ref? (build-uri #f #:host "foo" #:path "/bar")))
+
+  (pass-if "?foo"
+    (relative-ref? (build-uri #f #:query "foo"))))
+
+(with-test-prefix "uri-reference?"
+  (pass-if "ftp:"
+    (uri-reference? (build-uri 'ftp)))
+
+  (pass-if "ftp:foo"
+    (uri-reference? (build-uri 'ftp #:path "foo")))
+
+  (pass-if "ftp://foo/bar"
+    (uri-reference? (build-uri 'ftp #:host "foo" #:path "/bar")))
+
+  (pass-if "ftp://foo@bar:22/baz"
+    (uri-reference? (build-uri 'ftp #:userinfo "foo" #:host "bar" #:port 22 #:path "/baz")))
+
+  (pass-if "foo"
+    (uri-reference? (build-uri #f #:path "foo")))
+
+  (pass-if "/foo"
+    (uri-reference? (build-uri #f #:path "/foo")))
+
+  (pass-if "//foo/bar"
+    (uri-reference? (build-uri #f #:host "foo" #:path "/bar")))
+
+  (pass-if "?foo"
+    (uri-reference? (build-uri #f #:query "foo"))))
 
 (with-test-prefix "string->uri"
   (pass-if "ftp:"
@@ -149,6 +241,24 @@
            #:port 22
            #:path "/baz"))
 
+  (pass-if "foo"
+    (uri=? (string->uri "foo")
+           #:path "foo"))
+
+  (pass-if "/foo"
+    (uri=? (string->uri "/foo")
+           #:path "/foo"))
+
+  (pass-if "//foo/bar"
+    (uri=? (string->uri "//foo/bar")
+           #:host "foo"
+           #:path "/bar"))
+
+  (pass-if "?foo"
+    (uri=? (string->uri "?foo")
+           #:path ""
+           #:query "foo"))
+
   (pass-if "http://bad.host.1"
     (not (string->uri "http://bad.host.1")))
 
@@ -229,6 +339,21 @@
     (equal? "ftp://foo@bar:22/baz"
             (uri->string (string->uri "ftp://foo@bar:22/baz"))))
   
+  (pass-if "foo"
+    (equal? "foo"
+            (uri->string (string->uri "foo"))))
+
+  (pass-if "/foo"
+    (equal? "/foo" (uri->string (string->uri "/foo"))))
+
+  (pass-if "//foo/bar"
+    (equal? "//foo/bar"
+            (uri->string (string->uri "//foo/bar"))))
+
+  (pass-if "?foo"
+    (equal? "?foo"
+            (uri->string (string->uri "?foo"))))
+
   (when (memq 'socket *features*)
     (pass-if "http://192.0.2.1"
       (equal? "http://192.0.2.1"
-- 
1.7.10.4

