#!/usr/bin/python
"""
"""
import newsparser, sys, os
cmdname=os.path.basename(sys.argv[0])

OTHER=0
GLOBE=1
HERALD=2
ERRLOG = "pages-not-processed.txt"

def usage():
    print >>sys.stderr, \
        """Usage: %s <filename>
            filename -- filename with whitespace delimited two columns
            of URLs and filenames to be created in the current directory.""" \
            % cmdname
    sys.exit(1)

def distinguish(url):
    if url.find("www.boston.com") > 0:
        return GLOBE
    elif url.find("bostonherald") > 0:
        return HERALD
    else:
        return OTHER

if len(sys.argv) != 2:
    usage()
logfile = os.path.splitext(cmdname)[0] + '.log'

if os.path.isfile(logfile):
   os.remove(logfile)
listfilename = sys.argv[1]
try:
    listfile=open(listfilename,"r")
except IOError:
    raise IOError, "Cannot open the input filename %s!" % listfilename
try:
    errfile=open(ERRLOG,"w")
except IOError:
    raise IOError, "Cannot open the error log %s!" % ERRLOG

# http://www.boston.com/news/local/articles/2004/11/11/\
# seeking_a_path_to_mcas_success/?rss_id=Boston+Globe+--+City+/+Region+News
# We have to convert URLs given from RSS to the script with "Print Only" etc.
# pages.

for line in listfile:
    url,filename = line.split()
    newstype = distinguish(url)
    try:
        if newstype == GLOBE:
    #         page = newsparser.Globe(url,filename)
            print >>sys.stderr, "Globe, %s" % url
        elif newstype == HERALD:
            print >>sys.stderr, "Herald, %s" % url
    #         page = newsparser.Herald(url,filename)
        elif newstype == OTHER:
    #         page=newsparser.OtherNewspage(url,filename)
            print >>sys.stderr, "Other, %s" % url
    except newsparser.PageNotAvailable:
            print >>errfile,"%s\t%s" % (url,filename)

print >>sys.stderr, "OK"

#      | tidy -f tmplog -asxhtml -utf8 \
#         --doctype transitional --write-back no \
#         --new-inline-tags org \
#         --new-blocklevel-tags text,copyrite \
#         > $FILE.xhtml
