I am in the middle of writing a Bittorrent search scraper, I know there has
been much discussion into the legality of this issue [and I understand that
it will probably prevent it from being included in the SVN] but that's not
the point of this e-mail. I've successfully written a module that represents
a bittorrent site which has a filter and a list of torrents that meet that
filter. I am now in process of writing a periodic that will download any
"new" results that meet that filter. I was looking into using
"bittorrent-console" but I need a way to know when the torrent is finished,
so I can close that download process in the periodic. If anyone has any
hints, it would be greatly appreciated.
I've attached my current source, if interested.
freevo/src/torrentsite.py
freevo/src/torrentperiodic.py
freevo/src/helpers/torrentserver.py
It is structured and started the same way as the rssserver.
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
# torrentperiodic.py - This is the Freevo torrent periodic module
# -----------------------------------------------------------------------
# $Id: torrentperiodic.py 9743 2007-07-02 19:28:22Z duncan $
#
# Notes:
# Todo:
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2002 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------
import re,os,sys,glob,urllib,datetime,time,shutil
from subprocess import Popen
import cPickle, pickle
import config
import torrentsite
appname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
appconf = appname.upper()
DEBUG = hasattr(config, appconf+'_DEBUG') and eval('config.'+appconf+'_DEBUG') or config.DEBUG
def _debug_(text, level=1):
if DEBUG >= level:
try:
log.debug(str(text))
except:
print str(text)
def checkForDup(string):
cacheFile=config.FREEVO_CACHEDIR+"/torrent.pickle"
try:
try:
downloadedUrls=cPickle.load(open(cacheFile,"r"))
except:
downloadedUrls=pickle.load(open(cacheFile,"r"))
except IOError:
return False
except EOFError:
return False
foundFile=False
for line in downloadedUrls:
if string in line:
foundFile=True
return foundFile
def addFileToCache(string):
cacheFile=config.FREEVO_CACHEDIR+"/torrent.pickle"
downloadedUrls=[]
try:
try:
downloadedUrls = cPickle.load(open(cacheFile,"r"))
except:
downloadedUrls = pickle.load(open(cacheFile,"r"))
except IOError:
pass
downloadedUrls.append(string)
try:
cPickle.dump(downloadedUrls, open(cacheFile,"w"))
except:
pickle.dump(downloadedUrls, open(cacheFile,"w"))
def checkForUpdates():
try:
file = open(config.TORRENT_SITES,"r")
except IOError:
_debug_("ERROR: Could not open configuration file %s" % (config.TORRENT_SITES),5)
return
for line in file:
if line == '\n':
continue
if re.search("^#",line):
continue
try:
(url,query,downloadLinkSignal,torrentLinkSignal,limitResults,filterText)=re.split(";", line)
except ValueError:
continue
_debug_("Check %s for updates" % url,5)
try:
site = torrentsite.TorrentSite()
site.url=url
site.query=query
site.downloadLinkSignal=downloadLinkSignal
site.torrentLinkSignal=torrentLinkSignal
site.limitResults=int(limitResults)
site.filterText=re.sub('\n','',filterText).upper()
site.setPatterns()
site.parseSite()
for torrent in site.torrents:
_debug_("Torrent "%torrent,5)
filename=torrent.title
url=torrent.url
if not checkForDup(url):
_debug_("Getting torrent file: %s"%str(url))
cmdlog=open(os.path.join(config.LOGDIR, 'torrent-bittorrent.out'), 'a')
p = Popen('wget %s'%str(url), shell=True, stderr=cmdlog, stdout=cmdlog)
exitStatus = p.wait()
if exitStatus:
_debug_("Download failed - exit status %s." % exitStatus,5)
else:
_debug_("Running bittorrent download from %s" % str(url),5)
cmdlog=open(os.path.join(config.LOGDIR, 'torrent-bittorrent.out'), 'a')
p = Popen('bittorrent-console %s' % (str(url)), shell=True, stderr=cmdlog, stdout=cmdlog)
exitStatus = p.wait()
if exitStatus:
_debug_("Download failed - exit status %s." % exitStatus,5)
os.remove(filename)
else:
_debug_("Download completed (%s bytes)" % os.path.getsize(filename),5)
try:
shutil.move(filename, config.TV_RECORD_DIR)
except:
_debug_('failed to move %s to %s' % (filename, config.TV_RECORD_DIR),5)
addFileToCache(url)
else:
_debug_("Duplicate, not getting %s" % url,5)
except IOError:
_debug_("ERROR: Unable to download %s. Connection may be down." % (url),5)
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
# torrentserver.py - This is the Freevo torrent retreiver server
# -----------------------------------------------------------------------
# $Id: torrentserver.py 9710 2007-06-16 19:32:37Z phishman $
#
# Notes:
# Todo:
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2002 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------
'''
In local_conf.py add the following:
# File defining RSS feeds to monitor (see format below).
RSS_FEEDS='/etc/freevo/rss.feeds'
# Frequency (in seconds) to check for new downloads. Default is 3600 or 1 hour.
RSS_CHECK_INTERVAL=3600
# Download directory for video files.
RSS_VIDEO='/path/to/video/feeds/'
# Download directory for audio files.
RSS_AUDIO='/path/to/podcasts/'
You will need to make a rss.feeds file: it contains the URL. And after the
semicolon the number of days it's been published and how long the copy
should stay on the local machine before it gets deleted.
# Begin /etc/freevo/rss.feeds
http://twit.libsyn.com/rss;7
http://leo.am/podcasts/twit;7
http://leo.am/podcasts/itn;7
http://feeds.feedburner.com/TechRenegades;7
http://www.linuxactionshow.com/?feed=rss2&cat=3;30
http://www.thelinuxlink.net/tllts/tllts.rss;30
http://www.linux-games.ca/2006/redneck.xml;360
# End /etc/freevo/rss.feeds
'''
import os,sys,threading,time
import torrentperiodic
import config
from twisted.python import log
appname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
appconf = appname.upper()
# change uid
if __name__ == '__main__':
uid='config.'+appconf+'_UID'
gid='config.'+appconf+'_GID'
try:
if eval(uid) and os.getuid() == 0:
os.setgid(eval(gid))
os.setuid(eval(uid))
os.environ['USER'] = pwd.getpwuid(os.getuid())[0]
os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
except Exception, e:
print e
if len(sys.argv)>1 and sys.argv[1] == '--help':
print 'start or stop the internal torrentserver'
print 'usage freevo torrentserver [ start | stop ]'
sys.exit(0)
logfile = '%s/%s-%s.log' % (config.LOGDIR, appname, os.getuid())
log.startLogging(open(logfile, 'a'))
# than starting server to poll podcasts
while True:
t = threading.Thread(torrentperiodic.checkForUpdates())
t.start()
t.join()
try:
time.sleep(60)
except KeyboardInterrupt:
print 'Goodbye'
break
except Exception, e:
print e
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------
# torrentsite.py is the Freevo torrent site module
# -----------------------------------------------------------------------
# $Id: torrentsite.py 9738 2007-06-30 11:20:30Z phishman $
#
# Notes:
# Todo:
#
# -----------------------------------------------------------------------
# Freevo - A Home Theater PC framework
# Copyright (C) 2002 Krister Lagerstrom, et al.
# Please see the file freevo/Docs/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------
#import config
import os,sys
import re
import urllib2
from array import array
from twisted.python import log
appname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
appconf = appname.upper()
#DEBUG = hasattr(config, appconf+'_DEBUG') and eval('config.'+appconf+'_DEBUG') or config.DEBUG
DEBUG=5
def _debug_(text, level=1):
if DEBUG >= level:
try:
log.msg(str(text))
except:
print str(text)
__all__ = ["TorrentSite"]
class TorrentSite:
def __init__(self):
"TorrentSite"
#User variables
self.url='www.torrentspy.com'
self.query='search?query=EPL&submit.x=0&submit.y=0'
self.downloadLinkSignal='"Download Torrent"'
self.torrentLinkSignal='<a name="torrent">'
self.limitResults=10
self.filterText='United'
self.setPatterns()
def setPatterns(self):
#Composition variables
self.fullQuery=('http://%s/%s'%(self.url,self.query))
self.downloadPattern = re.compile(self.downloadLinkSignal,re.DOTALL)
self.linkPattern = re.compile('<a href=(.*?)>',re.DOTALL)
self.torrentPattern = re.compile(self.torrentLinkSignal,re.DOTALL)
self.filterPattern = re.compile(self.filterText.upper(),re.DOTALL)
#Other variables
self.torrents=[]
def __str__(self):
s = '"'+self.url+'"\n'
s+= '"'+self.query+'"\n'
for torrent in self.torrents:
s+= '"'+str(torrent.title)+'" "'+str(torrent.url)+'"\n'
return s
class Torrent:
def __init__(self,title,url):
"Torrent Item"
self.title=str(title)
self.url=str(url)
def __str__(self):
s = '"'+str(self.title)+'"\n'
s+= '"'+str(self.url)+'"\n'
return s
## Get the search results and put them into a list
# INPUT: String that represents the HTTP query
# OUTPUT: List of HTML that matches the pattern(Regular Expression)
def getHtmlResults(self,address,pattern):
_debug_("Getting Query from %s"%(address),3)
output=[]
file_request = urllib2.Request(address)
file_opener = urllib2.build_opener()
file_object = file_opener.open(file_request)
for line in file_object.readlines():
if pattern == None: output.append(line)
elif pattern.search(line): output.append(line)
file_object.close()
return output
## Get links from the input
# INPUT: List of HTML lines to process
# OUTPUT: List of HTML lines that match the linkPattern(Regular Expression)
def getIndividualLinks(self,input,pattern):
_debug_("Getting Links to HTML Pages",3)
output=[]
for line in input:
links=self.linkPattern.findall(line)
if links:
for link in links:
if pattern.findall(link): output.append(link)
return output
## Get the download link from the HTML
# INPUT: List of HTML lines to process
# OUTPUT: List of Strings that match the downloadPattern(Regular Expression)
def getUrlFromHtml(self,input):
_debug_("Getting URLs from the HTML Pages",3)
output=[]
for html in input:
downloadLink = re.split(" ",html)[0]
if downloadLink: output.append(re.sub('\"','',downloadLink))
return output
## Get the download link from the download page
# INPUT: Address of the download page
# OUTPUT: List of Title,URL pairs
def getDownloadLinkFromDownloadPage(self,input):
_debug_("Getting cached Download Links from the Download Pages (%i total)"%(len(input)),3)
output=[]
i=0
for downloadPage in input:
i=i+1
if i <= self.limitResults:
_debug_("Getting %i of %i"%(i,self.limitResults),5)
downloadQuery=("http://%s/%s"%(self.url,downloadPage))
page=self.getHtmlResults(downloadQuery,self.torrentPattern)
links=self.getIndividualLinks(page,self.downloadPattern)
downloadLink=self.getUrlFromHtml(links)
if downloadLink: output.append((downloadPage,downloadLink[0]))
return output
def checkFilter(self,input):
_debug_("%s %s"%(self.filterText,input.upper()))
if self.filterPattern == None: return True
elif self.filterPattern.search(input.upper()): return True
else: return False
def parseSite(self):
results=self.getHtmlResults(self.fullQuery,None)
links=self.getIndividualLinks(results,self.downloadPattern)
downloadPages=self.getUrlFromHtml(links)
downloadPairs=self.getDownloadLinkFromDownloadPage(downloadPages)
for pair in downloadPairs:
title=re.split("/",pair[0])[-1]+".avi"
url=pair[1]
if self.checkFilter(title): self.torrents.append(self.Torrent(title,url))
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Freevo-devel mailing list
Freevo-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/freevo-devel