#!/usr/bin/env python

import sys, os, re
import htmllib, urllib, urllib2, urlparse, formatter

asx_re = re.compile(r"""<ref\s*href\s*=\s*"(.*)"\s*""", re.IGNORECASE | re.MULTILINE)

class EmbedParser(htmllib.HTMLParser):
    def __init__(self, baseurl):
        htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
        self.objects = []
        self.baseurl = baseurl

    def do_embed(self, attrs):
        for attr in attrs:
            if attr[0] == 'src':
                self.objects.append(urlparse.urljoin(self.baseurl, attr[1]))

def getEmbeddedObjects(s, baseurl):
    ep = EmbedParser(baseurl)
    ep.feed(s)
    return ep.objects


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print "Usage: getvideo [URL]..."
        sys.exit(2)

    objects = []
    for url in sys.argv[1:]:
        page = urllib.urlopen(url).read()
        objects.extend(getEmbeddedObjects(page, url))

    for obj in objects:
        name = urlparse.urlparse(obj)[2].split('/')[-1]
        if os.path.exists(name):
            print "%s exists. Not overwriting" % name
            continue

        print "Saving %s..." % name
        f = open(name, 'w')

        # Check to see if there's some intermediate file...
        try:
            data = urllib2.urlopen(obj).read(1024)
        except HTTPError, e:
            sys.stdout.write("Could not retrieve URL %s: %s\n" % (obj, e))
            continue

        if data.find('<?quicktime type="application/x-quicktime-media-link"?>') > -1:
            obj = data.split('src="')[1].split('"')[0]
        elif data.lower().find('<asx') == 0:
            res = asx_re.search(data)
            if res:
                obj = res.group(1)
        elif data.lower().find('rtsp:') == 0:
            obj = data

        f.write(urllib.urlopen(obj).read())
        f.close()


# vim: ts=4 sw=4 et
