I am in the middle of writing a Bittorrent search scraper, I know there has
been much discussion into the legality of this issue [and I understand that
it will probably prevent it from being included in the SVN] but that's not
the point of this e-mail. I've successfully written a module that represents
a bittorrent site which has a filter and a list of torrents that meet that
filter. I am now in process of writing a periodic that will download any
"new" results that meet that filter. I was looking into using
"bittorrent-console" but I need a way to know when the torrent is finished,
so I can close that download process in the periodic. If anyone has any
hints, it would be greatly appreciated.

I've attached my current source, if interested.
freevo/src/torrentsite.py
freevo/src/torrentperiodic.py
freevo/src/helpers/torrentserver.py

It is structured and started the same way as the rssserver.
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
# torrentperiodic.py - This is the Freevo torrent periodic module
# -----------------------------------------------------------------------
# $Id: torrentperiodic.py 9743 2007-07-02 19:28:22Z duncan $
#
# Notes:
# Todo:
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2002 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------

import re,os,sys,glob,urllib,datetime,time,shutil
from subprocess import Popen
import cPickle, pickle
import config
import torrentsite

appname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
appconf = appname.upper()
DEBUG = hasattr(config, appconf+'_DEBUG') and eval('config.'+appconf+'_DEBUG') or config.DEBUG

def _debug_(text, level=1):
    if DEBUG >= level:
        try:
            log.debug(str(text))
        except:
            print str(text)

def checkForDup(string):
    cacheFile=config.FREEVO_CACHEDIR+"/torrent.pickle"
    try:
        try:
            downloadedUrls=cPickle.load(open(cacheFile,"r"))
        except:
            downloadedUrls=pickle.load(open(cacheFile,"r"))
    except IOError:
        return False
    except EOFError:
        return False
    foundFile=False
    for line in downloadedUrls:
        if string in line:
            foundFile=True
    return foundFile

def addFileToCache(string):
    cacheFile=config.FREEVO_CACHEDIR+"/torrent.pickle"
    downloadedUrls=[]
    try:
        try:
            downloadedUrls = cPickle.load(open(cacheFile,"r"))
        except:
            downloadedUrls = pickle.load(open(cacheFile,"r"))
    except IOError:
        pass
    downloadedUrls.append(string)
    try:
        cPickle.dump(downloadedUrls, open(cacheFile,"w"))
    except:
        pickle.dump(downloadedUrls, open(cacheFile,"w"))

def checkForUpdates():
    try:
        file = open(config.TORRENT_SITES,"r")
    except IOError:
        _debug_("ERROR: Could not open configuration file %s" % (config.TORRENT_SITES),5)
        return

    for line in file:
        if line == '\n':
            continue
        if re.search("^#",line):
            continue
        try:
            (url,query,downloadLinkSignal,torrentLinkSignal,limitResults,filterText)=re.split(";", line)
        except ValueError:
            continue
        _debug_("Check %s for updates" % url,5)
        try:
            site = torrentsite.TorrentSite()
            site.url=url
            site.query=query
            site.downloadLinkSignal=downloadLinkSignal
            site.torrentLinkSignal=torrentLinkSignal
            site.limitResults=int(limitResults)
            site.filterText=re.sub('\n','',filterText).upper()
            site.setPatterns()
            site.parseSite()
            for torrent in site.torrents:
                _debug_("Torrent "%torrent,5)
                filename=torrent.title
                url=torrent.url
                if not checkForDup(url):
                    _debug_("Getting torrent file: %s"%str(url))
                    cmdlog=open(os.path.join(config.LOGDIR, 'torrent-bittorrent.out'), 'a')
                    p = Popen('wget %s'%str(url), shell=True, stderr=cmdlog, stdout=cmdlog)
                    exitStatus = p.wait()
                    if exitStatus:
                        _debug_("Download failed - exit status %s." % exitStatus,5)
                    else:
                        _debug_("Running bittorrent download from %s" % str(url),5)
                        cmdlog=open(os.path.join(config.LOGDIR, 'torrent-bittorrent.out'), 'a')
                        p = Popen('bittorrent-console %s' % (str(url)), shell=True, stderr=cmdlog, stdout=cmdlog)
                        exitStatus = p.wait()
                        if exitStatus:
                            _debug_("Download failed - exit status %s." % exitStatus,5)
                            os.remove(filename)
                        else:
                            _debug_("Download completed (%s bytes)" % os.path.getsize(filename),5)
                            try:
                                shutil.move(filename, config.TV_RECORD_DIR)
                            except:
                                _debug_('failed to move %s to %s' % (filename, config.TV_RECORD_DIR),5)
                        addFileToCache(url)
                else:
                    _debug_("Duplicate, not getting %s" % url,5)
        except IOError:
            _debug_("ERROR: Unable to download %s. Connection may be down." % (url),5)
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
# torrentserver.py - This is the Freevo torrent retreiver server
# -----------------------------------------------------------------------
# $Id: torrentserver.py 9710 2007-06-16 19:32:37Z phishman $
#
# Notes:
# Todo:
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2002 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------

'''
In local_conf.py add the following:

# File defining RSS feeds to monitor (see format below).
RSS_FEEDS='/etc/freevo/rss.feeds'
# Frequency (in seconds) to check for new downloads. Default is 3600 or 1 hour.
RSS_CHECK_INTERVAL=3600
# Download directory for video files.
RSS_VIDEO='/path/to/video/feeds/'
# Download directory for audio files.
RSS_AUDIO='/path/to/podcasts/'

You will need to make a rss.feeds file: it contains the URL. And after the
semicolon the number of days it's been published and how long the copy
should stay on the local machine before it gets deleted.

# Begin /etc/freevo/rss.feeds
http://twit.libsyn.com/rss;7
http://leo.am/podcasts/twit;7
http://leo.am/podcasts/itn;7
http://feeds.feedburner.com/TechRenegades;7
http://www.linuxactionshow.com/?feed=rss2&cat=3;30
http://www.thelinuxlink.net/tllts/tllts.rss;30
http://www.linux-games.ca/2006/redneck.xml;360
# End /etc/freevo/rss.feeds
'''

import os,sys,threading,time
import torrentperiodic
import config
from twisted.python import log

appname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
appconf = appname.upper()

# change uid
if __name__ == '__main__':
    uid='config.'+appconf+'_UID'
    gid='config.'+appconf+'_GID'
    try:
        if eval(uid) and os.getuid() == 0:
            os.setgid(eval(gid))
            os.setuid(eval(uid))
            os.environ['USER'] = pwd.getpwuid(os.getuid())[0]
            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
    except Exception, e:
        print e

if len(sys.argv)>1 and sys.argv[1] == '--help':
    print 'start or stop the internal torrentserver'
    print 'usage freevo torrentserver [ start | stop ]'
    sys.exit(0)


logfile = '%s/%s-%s.log' % (config.LOGDIR, appname, os.getuid())
log.startLogging(open(logfile, 'a'))

# than starting server to poll podcasts
while True:
    t = threading.Thread(torrentperiodic.checkForUpdates())
    t.start()
    t.join()
    try:
        time.sleep(60)
    except KeyboardInterrupt:
        print 'Goodbye'
        break
    except Exception, e:
        print e
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
# torrentsite.py is the Freevo torrent site module
# -----------------------------------------------------------------------
# $Id: torrentsite.py 9738 2007-06-30 11:20:30Z phishman $
#
# Notes:
# Todo:
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2002 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------

#import config
import os,sys
import re
import urllib2
from array import array
from twisted.python import log

appname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
appconf = appname.upper()
#DEBUG = hasattr(config, appconf+'_DEBUG') and eval('config.'+appconf+'_DEBUG') or config.DEBUG
DEBUG=5

def _debug_(text, level=1):
    if DEBUG >= level:
        try:
            log.msg(str(text))
        except:
            print str(text)

__all__ = ["TorrentSite"]

class TorrentSite:
    def __init__(self):
        "TorrentSite"
        #User variables
        self.url='www.torrentspy.com'
        self.query='search?query=EPL&submit.x=0&submit.y=0'
        self.downloadLinkSignal='"Download Torrent"'
        self.torrentLinkSignal='<a name="torrent">'
        self.limitResults=10
        self.filterText='United'
        self.setPatterns()

    def setPatterns(self):
        #Composition variables
        self.fullQuery=('http://%s/%s'%(self.url,self.query))
        self.downloadPattern = re.compile(self.downloadLinkSignal,re.DOTALL)
        self.linkPattern = re.compile('<a href=(.*?)>',re.DOTALL)
        self.torrentPattern = re.compile(self.torrentLinkSignal,re.DOTALL)
        self.filterPattern = re.compile(self.filterText.upper(),re.DOTALL)
        #Other variables
        self.torrents=[]

    def __str__(self):
        s = '"'+self.url+'"\n'
        s+= '"'+self.query+'"\n'
        for torrent in self.torrents:
            s+= '"'+str(torrent.title)+'" "'+str(torrent.url)+'"\n'
        return s

    class Torrent:
        def __init__(self,title,url):
            "Torrent Item"
            self.title=str(title)
            self.url=str(url)

        def __str__(self):
            s = '"'+str(self.title)+'"\n'
            s+= '"'+str(self.url)+'"\n'
            return s


    ## Get the search results and put them into a list
    #  INPUT:  String that represents the HTTP query
    #  OUTPUT: List of HTML that matches the pattern(Regular Expression)
    def getHtmlResults(self,address,pattern):
        _debug_("Getting Query from %s"%(address),3)
        output=[]
        file_request = urllib2.Request(address)
        file_opener = urllib2.build_opener()
        file_object = file_opener.open(file_request)
        for line in file_object.readlines():
            if pattern == None: output.append(line)
            elif pattern.search(line): output.append(line)
        file_object.close()
        return output

    ## Get links from the input
    #  INPUT: List of HTML lines to process
    #  OUTPUT: List of HTML lines that match the linkPattern(Regular Expression)
    def getIndividualLinks(self,input,pattern):
        _debug_("Getting Links to HTML Pages",3)
        output=[]
        for line in input:
            links=self.linkPattern.findall(line)
            if links:
               for link in links:
                   if pattern.findall(link): output.append(link)
        return output

    ## Get the download link from the HTML
    #  INPUT: List of HTML lines to process
    #  OUTPUT: List of Strings that match the downloadPattern(Regular Expression)
    def getUrlFromHtml(self,input):
        _debug_("Getting URLs from the HTML Pages",3)
        output=[]
        for html in input:
            downloadLink = re.split(" ",html)[0]
            if downloadLink: output.append(re.sub('\"','',downloadLink))
        return output

    ## Get the download link from the download page
    #  INPUT: Address of the download page
    #  OUTPUT: List of Title,URL pairs
    def getDownloadLinkFromDownloadPage(self,input):
        _debug_("Getting cached Download Links from the Download Pages (%i total)"%(len(input)),3)
        output=[]
        i=0
        for downloadPage in input:
            i=i+1
            if i <= self.limitResults:
                _debug_("Getting %i of %i"%(i,self.limitResults),5)
                downloadQuery=("http://%s/%s"%(self.url,downloadPage))
                page=self.getHtmlResults(downloadQuery,self.torrentPattern)
                links=self.getIndividualLinks(page,self.downloadPattern)
                downloadLink=self.getUrlFromHtml(links)
                if downloadLink: output.append((downloadPage,downloadLink[0]))
        return output

    def checkFilter(self,input):
        _debug_("%s %s"%(self.filterText,input.upper()))
        if self.filterPattern == None: return True
        elif self.filterPattern.search(input.upper()): return True
        else: return False

    def parseSite(self):
        results=self.getHtmlResults(self.fullQuery,None)
        links=self.getIndividualLinks(results,self.downloadPattern)
        downloadPages=self.getUrlFromHtml(links)
        downloadPairs=self.getDownloadLinkFromDownloadPage(downloadPages)
        for pair in downloadPairs:
            title=re.split("/",pair[0])[-1]+".avi"
            url=pair[1]
            if self.checkFilter(title): self.torrents.append(self.Torrent(title,url))
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Freevo-users mailing list
Freevo-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/freevo-users

Reply via email to