#!/usr/bin/env python

"""
Url.py   $Id: Url.py,v 1.16 2001/06/14 18:59:41 heiserd Exp $

Utility class to encapsulate information about an URL and the useful
operations thereon.


Copyright 1999, 2000 by Holger Duerer <holly@starship.python.net>

Distributable under the GNU General Public License Version 2 or newer.

"""

import urlparse, urllib, string, sys, os, re

urlparse.uses_relative.append ('plucker')
urlparse.uses_netloc.append ('plucker')
urlparse.uses_params.append ('plucker')
urlparse.uses_query.append ('plucker')
urlparse.uses_fragment.append ('plucker')


######################################################################
# Replacement for the urlparse lib, because this is buggy on Windows #
######################################################################
def windows_file_url_parse (url):
    prot='file'
    fragment=''
    i = string.rfind(url, '#')
    if i >= 0:
        fragment = url[i+1:]
        url = url[:i]
    path=url
    if string.lower(path[0:7]) == 'file://':
        path=path[7:]
    if string.lower(path[0:5]) == 'file:':
        path=path[5:]
    if ((string.upper(path[0:1]) >= 'A') and (string.upper(path[0:1]) <= 'Z')) and (path[1:2] == ':'):
            path = string.upper(path[0:1]) + path[1:]
    host=''
    params=''
    query=''
    return prot, host, path, params, query, fragment


######################################################################
# Replacement for the urlparse lib, because this is buggy on Windows #
######################################################################
def windows_file_urljoin(base, url):
    def add_fragment(path, frag):
        if frag != '':
            res = path + '#' + frag
        else:
            res = path
        return res

    i = string.find(url, ':')
    # a new http:// file:// not based to source is _not_ used
    if (i < 3) or (i > 10):
        (prot, host, path, params, query, fragment) = windows_file_url_parse (url)
        if path != '':
            ######################################
            # FIX ME!!!!                         #
            # path like .\test\..\images\        #
            # are not work yet!                  #
            ######################################
            # .\file.ext == file.ext
            if (path[0:2] == '.\\') or (path[0:2] == './'):
                path = path[2:]
                url = os.path.join (os.path.dirname(str (base)), add_fragment(path, fragment))
                return url
            # one dir up
            if (path[0:3] == '..\\') or (path[0:3] == '../'):
                path = path[3:]
                url = os.path.join (os.path.dirname(os.path.dirname(str (base))), add_fragment(path, fragment))
                return url
            # two dir up
            if (path[0:4] == '...\\') or (path[0:4] == '.../'):
                path = path[4:]
                url = os.path.join (os.path.dirname(os.path.dirname(os.path.dirname(str (base)))), add_fragment(path, fragment))
                return url
            # Root dir
            if (path[0:1] == '\\') or (path[0:1] == '/'):
                path = path[1:]
                str_base = str (base)
                url = os.path.join ('file:' + str_base[5] + ':' , add_fragment(path, fragment))
                return url
            # normale case
            else:
                url = os.path.join (os.path.dirname(str (base)), add_fragment(path, fragment))
                return url
        else:
            url = base + '#' + fragment
            return url
    else:
        return url

    return url


######################################################################
# Replacement for the urlparse lib, because this is buggy on Windows #
######################################################################
def windows_file_urlunparse(protocol, host, path, params, query, fragment):
    text = ''
    if protocol != '':
        text = text + protocol + ':' + path
    if fragment != '':
        text = text + '#' + fragment
    return text


class URL:
    """Encapsulate some useful things from urllib and urlparse"""

    def __init__ (self, url, base = None):
        if isinstance (url, URL) and base is None:
            # Simple copy constructor: make it more efficient
            self._protocol = url._protocol
            self._host = url._host
            self._path = url._path
            self._params = url._params
            self._query = url._query
            self._fragment = url._fragment
        else:            
            url = str (url)
            if base is not None:
                if sys.platform == 'win32' and string.lower(str (base)[0:5]) == 'file:':
                    url = windows_file_urljoin (str (base), url)
                else:
                    url = urlparse.urljoin (str (base), url)
            # according to RFC 2396, this 'unquote' is inappropriate
            # according to the HTML 4.01 spec, this 'unquote' is unnecessary
            # url = urllib.unquote (url)
            if sys.platform == 'win32' and string.lower(url[0:5]) == 'file:':
                (prot, host, path, params, query, fragment) = windows_file_url_parse (url)
            else:
                (prot, host, path, params, query, fragment) = urlparse.urlparse (url)
            host = string.lower (host)
            self._protocol = prot
            self._host = host
            self._path = path
            self._params = params
            self._query = query
            self._fragment = fragment

    def as_string (self, with_fragment):
        if with_fragment:
            fragment = self._fragment
        else:
            fragment = ""
        if sys.platform == 'win32' and self._protocol == 'file':
            text = windows_file_urlunparse (self._protocol,
                                            self._host,
                                            self._path,
                                            self._params,
                                            self._query,
                                            fragment)
        else:
            text = urlparse.urlunparse ((self._protocol,
                                         self._host,
                                         self._path,
                                         self._params,
                                         self._query,
                                         fragment))
        return text

     
    def __str__ (self):
        return self.as_string (with_fragment=1)
    
    def __repr__ (self):
        return "URL (%s)" % repr (self.as_string (with_fragment=1))

    def get_protocol (self):
        return self._protocol
            
    def get_host (self):
        return self._host
            
    def get_path (self):
        return self._path

    def get_fragment (self):
        return self._fragment

    def get_full_path (self, with_fragment):
        if with_fragment:
            fragment = self._fragment
        else:
            fragment = ""
        if sys.platform == 'win32' and self._protocol == 'file':
            text = windows_file_urlunparse ("",
                                            "",
                                            self._path,
                                            self._params,
                                            self._query,
                                            fragment)
        else:
            text = urlparse.urlunparse (("",
                                         "",
                                         self._path,
                                         self._params,
                                         self._query,
                                         fragment))
        return text

    def remove_fragment (self):
        self._fragment = ""


def CleanURL (url, base=None):
    """Remove leading and trailing white space and generally clean up
    this URL"""
    if isinstance (url, URL):
        # This branch is currently never taken, we get always called
        # with a string as 'url'
        if base is not None:
            # FIXME!!  Does this make sense at all?  URLs should always be
            # absoulte, so giving a base is moot...
            result = Url (url, base).as_string (with_fragment=1)
        else:
            result = url.as_string (with_fragment=1)
    else:
        # Start of fix for spaces
        r1 = re.compile(r' ', re.IGNORECASE)
        url = r1.sub('%20', str(url), 0)
        # End of fix for spaces
        url = string.strip (str (url))
        url = URL (url, base)

        result = url.as_string (with_fragment=1)
    return result