import urllib2

c = 0
for line in open("big5urls.txt", "r").readlines():
    c += 1
    url = line[:-1]
    try:
        fetch = urllib2.urlopen("http://" + url)
    except:
        print "fetching", c, url
        continue
    if fetch.getcode() != 200:
        print "code", c, url
        continue
    type = fetch.headers.getheader('content-type').lower()
    if "utf-8" in type or "iso-8859-1" in type:
        print "charset", c, url, type
        continue

    try:
        open("fetch/fetch-" + str(c).zfill(3) + ".html", "wb").write(fetch.read())
    except:
        print "writing", c, url
