I use this script to ban authors from Google Groups. You need to create a banned authors text file with each author separated by a new line. For Mozilla you need to compile it to a pyc file, associate pyc files with Python and create a bookmark. You then use the bookmark to enter google groups web page.
# remove banned author and authors with mostly caps # to compile to pyc #>>>import py_compile #>>>py_compile.compile("file.py") import urllib2 import webbrowser import os from bs4 import BeautifulSoup PALEMOON = 'Mozilla/5.0 (Windows NT 6.1; WOW64) KHTML/4.11 Gecko/20130308 Firefox/33.0 (PaleMoon/25.2)' WATERFOX = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:40.0) Gecko/20100101 Firefox/51.1.0 Waterfox/51.1.0' USERAGENTBASE = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:40.0) Gecko/20100101 ' BROWSERPATH = 'C:\\"Program Files"\\Waterfox\\waterfox.exe' FILENAME = 'C:\\PyStuff\\pygroup.htm' WEBPAGE = "https://groups.google.com/forum/?_escaped_fragment_=forum/comp.lang.python%5B1-50%5D" BANNED_AUTHORS_FILE = 'C:\\PyStuff\\bannedAuthors.txt' def getUserAgentVersion(): """ get the useragent version returns agentVersion -- user agent version in format Firefox/51.0.1 Waterfox/51.0.1 """ bvers = os.popen(BROWSERPATH + " -v").read() bversList = bvers.split() agentVersion = 'Firefox/' + bversList[2] + ' ' + bversList[1] + '/' + bversList[2] return agentVersion def getwebpage(url): """ Open a webpage url -- the url to the webpage returns page -- the source for the webpage """ user_agent = USERAGENTBASE + getUserAgentVersion() headers = { 'User-Agent' : user_agent } req = urllib2.Request(url, None, headers) response = urllib2.urlopen(req) page = response.read() return page def getBannedAuthors(): """ Convert the banned authors text file into a list returns bannedAuthors -- list of banned author strings """ f = open(BANNED_AUTHORS_FILE, 'r') bannedAuthors = f.read().split('\n') f.close() return bannedAuthors def removeBadAuthors(html_doc): """ Remove posts from google group by authors that are mostly caps or on the Banned List html_doc -- an html document """ bannedAuthors = getBannedAuthors() print bannedAuthors soup = BeautifulSoup(html_doc) #print soup.prettify() post = soup.find("tr") while post is not None: author = post.find("td", "author") aname = author.get_text() if author is None: print "Author is None" oldpost = post post = oldpost.find_next_sibling('tr') oldpost.decompose() elif aname in bannedAuthors: print "Author is Banned" oldpost = post post = oldpost.find_next_sibling('tr') oldpost.decompose() else: print author numCaps = 1.0 * sum(1 for c in aname if c.isupper()) ratio = numCaps/(1.0*len(aname)) print ratio oldpost = post post = oldpost.find_next_sibling('tr') if ratio > 0.7: oldpost.decompose() print "BIG" if post is None: print "Post is NONE" f = open(FILENAME, 'w') f.write(soup.prettify().encode('ascii', 'ignore')) f.close() def main(): html_doc = getwebpage(WEBPAGE) removeBadAuthors(html_doc) webbrowser.open(FILENAME) print 'done' if __name__ == "__main__": main() -- https://mail.python.org/mailman/listinfo/python-list