Author: deryck Date: 2006-06-13 22:10:34 +0000 (Tue, 13 Jun 2006) New Revision: 1001
WebSVN: http://websvn.samba.org/cgi-bin/viewcvs.cgi?view=rev&root=samba-web&rev=1001 Log: news.samba.org is on jump now so we only need to pull headlines for samba.org and mirrors. deryck Modified: trunk/scripts/updateNews.py Changeset: Modified: trunk/scripts/updateNews.py =================================================================== --- trunk/scripts/updateNews.py 2006-06-13 18:24:56 UTC (rev 1000) +++ trunk/scripts/updateNews.py 2006-06-13 22:10:34 UTC (rev 1001) @@ -1,246 +1,12 @@ #! /usr/bin/python -# Copyright (C) 2004 by Deryck Hodge <[EMAIL PROTECTED]> -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 -# USA +""" Get headlines froms news.samba.org for inclusion on samba.org """ -""" updateNews.py -- a script for handling files on news.samba.org.""" +from urllib import urlopen -import os, time, re -from stat import ST_MTIME +html = urlopen('http://news.samba.org/headlines/').read() -top_dir = '/data/httpd/html/samba/news' # set to news directory path -not_news = ['.svn', 'images', 'style', 'calendar', 'index.html', - 'articles', 'static', 'js'] - - -# Get list of news directories. Then, pair up dir name with dir files. -os.chdir(top_dir) -topics = [] - -for file in os.listdir(os.curdir): - if file in not_news: continue - if os.path.isdir(file): - topics.append(file) -topics.sort() - -topics_files = {} -for topic in topics: - topics_files[topic] = os.listdir(topic) - - -# Write list of topics to 'sections.html' -sections = open('sections.html', 'w') -sections.write('<ul>') -for topic in topics: - sections.write('<li><a href="/samba/news/' + topic + '/">' + topic + '/</a></li>') -sections.write('</ul>') -sections.close() - - -# Define function for converting date tuple to string -def date_to_str((year, mn, dy)): - mn_name = ('', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December') - - return str(dy) + ' ' + mn_name[mn] + ' ' + str(year) - - -# Loop through each directory, find all stories, and create main index.html -# Store filename/path info for search engine while we're here -all_stories = {} -search_index = {} -for topic in topics: - os.chdir(topic) - - for file in os.listdir(os.curdir): - if file in not_news: continue - f_lines = open(file, 'r').readlines() - story = "".join(f_lines) + '<div class="reference">Link: <a href="/samba/news/' + topic + '/#' + file[:-5] + '">' + topic + '/</a></div>\n\n' - f_date = os.stat(file)[ST_MTIME] - all_stories[f_date] = story - - # Search engine setup. - search_index[file[:-5]] = '/samba/news/' + topic + '/' + file - - os.chdir(top_dir) - -post_dates = all_stories.keys() -post_dates.sort() -post_dates.reverse() - -index = open('index.html', 'w') -index.write('<!--#include virtual="/samba/news/header.html" -->\n') -index.write('<title>news.samba.org</title>\n') -index.write('<!--#include virtual="/samba/news/header2.html" -->\n\n') -index.close() - -# Only list the 40 most recent stories on the main news page -count = 40 -for date in post_dates: - - if count > 0: - news_text = all_stories[date] - h2date = date_to_str(time.gmtime(date)[:3]) - index = open('index.html', 'a') - if open('index.html', 'r').read().find('<h2>' + h2date + '</h2>\n\n') >= 0: - index.write(news_text) - else: - index.write('<h2>' + h2date + '</h2>\n\n') - index.write(news_text) - index.close() - count = count - 1 - -index = open('index.html', 'a') -index.write('<!--#include virtual="/samba/news/footer.html" -->\n\n') -index.close() - -# Create the search index table for javascript -os.chdir(top_dir + '/js') -js = open('searchIndex.js', 'w') -js.write('/*\n') -js.write(' This file is auto-generated. Do not edit.\n') -js.write('*/\n\n') -js.write('sIndex = {};\n') -for filename in search_index.keys(): - js.write('sIndex[\'' + filename + '\'] = \'' + search_index[filename] + '\';\n') -#js.write('}\n') -js.close() - -os.chdir(top_dir) - -# Define function that creates index.html for each directory. -def archive(dir, files): - topic = dir - os.chdir(topic) - filelist = files - - stories_by_date = {} - - for file in filelist: - if file in not_news: continue - f_lines = open(file, 'r').readlines() - f_date = os.stat(file)[ST_MTIME] - stories_by_date[f_date] = f_lines - - index = open('index.html', 'w') - index.write('<!--#include virtual="/samba/news/header.html" -->\n') - index.write('<title>' + topic + '/' + ' on news.samba.org</title>\n') - index.write('<!--#include virtual="/samba/news/header2.html" -->\n\n') - index.write('<h1>' + topic + ' archive on news.samba.org</h1>\n\n') - index.write('<p>All stories for the ' + topic + ' topic are archived here</p>\n\n') - index.close() - - post_dates = stories_by_date.keys() - post_dates.sort() - post_dates.reverse() - - for date in post_dates: - news_text = "".join(stories_by_date[date]) - h2date = date_to_str(time.gmtime(date)[:3]) - index = open('index.html', 'a') - if open('index.html', 'r').read().find('<h2>' + h2date + '</h2>\n\n') >= 0: - index.write(news_text) - else: - index.write('<h2>' + h2date + '</h2>\n\n') - index.write(news_text) - index.close() - - index = open('index.html', 'a') - index.write('<!--#include virtual="/samba/news/footer.html" -->\n\n') - index.close - - os.chdir(top_dir) - - -# Loop through each subdirectory, creating an index.html file. -for topic in topics_files.keys(): - archive(topic, topics_files[topic]) - - -# Create headlines for samba.org from last ten news items -all_news = {} -for file in topics_files.keys(): - os.chdir(file) - for this_file in topics_files[file]: - if this_file in not_news: - continue - else: - all_news[os.stat(this_file)[ST_MTIME]] = open(this_file, 'r').readlines() - os.chdir(top_dir) - -news_dates = all_news.keys() -news_dates.sort() -news_dates.reverse() - -news_for_headlines = {} -for date in news_dates: - for line in all_news[date]: - if line.find('<h3>') > -1 and len(news_for_headlines) < 10: - # Search for text between quotes - link = re.search('(?<=\")\S+(?=\")', line) - # Search for text between > and </a - title = re.search('(?<=\"\>).+(?=\<\/a)', line) - news_for_headlines[date] = (link.group(0), title.group(0)) - -headline_dates = news_for_headlines.keys() -headline_dates.sort() -headline_dates.reverse() - -headlines = open('headlines.html', 'w') -headlines.write('<ul class="news">\n') -for date in headline_dates: - headlines.write('<li>' + date_to_str(time.gmtime(date)[:3]) + ' <a href="/samba/news/#' + news_for_headlines[date][0] + '">' + news_for_headlines[date][1] + '</a></li>\n') -headlines.write('</ul>\n') +headlines = open('/data/httpd/html/samba/news/headlines.html', 'w') +headlines.write(html) headlines.close() - -# Create an rss feed -feed = open('sambanews.xml', 'w') -feed.write('<?xml version="1.0"?>\n') -feed.write('<rss version="2.0">\n\n') -feed.write('<channel>\n\n') -feed.write('<title>news.samba.org</title>\n') -feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n') -feed.write('<link>http://news.samba.org/</link>\n\n') - -count = 10 -for date in post_dates: - item_text = all_stories[date] - - if count > 0: - title = re.search('(?<=\"\>).+(?=\<\/a)', item_text) - text = re.search('<div class=\"article\"\>(\s|.)*?</div>', item_text) - link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\"/samba/news/).+(?=\"\>)', item_text) - pub_date = time.asctime(time.gmtime(date)) - - # Drop end tag unless nested divs were used - if text.group()[21:].find('<div') > 0: - description = text.group()[21:] - else: - description = text.group()[21:-6] - - feed.write('<item>\n') - feed.write('<title><![CDATA[' + title.group() + ']]></title>\n') - feed.write('<description><![CDATA[' + description + ']]></description>\n') - feed.write('<link>http://news.samba.org/' + link.group() + '</link>\n') - feed.write('<pubDate>' + pub_date + '</pubDate>') - feed.write('</item>\n\n') - count = count - 1 - -feed.write('</channel>\n') -feed.write('</rss>\n') -feed.close() -