The branch, master has been updated via c155466 Remove some unncessary indirects. via cceb98b Implement diff. via 95015a7 Simplify history walking. via 10c6a03 work on history support using dulwich from c1e3f3d print the return code value as it can help debug
http://gitweb.samba.org/?p=build-farm.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit c1554666b2da05a03aeca52a4c4ab0498a2c9e90 Author: Jelmer Vernooij <jel...@samba.org> Date: Fri Nov 12 10:17:37 2010 +0100 Remove some unncessary indirects. commit cceb98be07ee509d8560dfb4e3a16796514aa584 Author: Jelmer Vernooij <jel...@samba.org> Date: Fri Nov 12 09:56:41 2010 +0100 Implement diff. commit 95015a7c09f04af4b7a717cf8277db960f32b7b8 Author: Jelmer Vernooij <jel...@samba.org> Date: Fri Nov 12 09:45:33 2010 +0100 Simplify history walking. commit 10c6a0392671f0c4022c4b4decc2399c1d4a2e33 Author: Jelmer Vernooij <jel...@samba.org> Date: Fri Nov 12 02:50:29 2010 +0100 work on history support using dulwich ----------------------------------------------------------------------- Summary of changes: buildfarm/__init__.py | 4 + buildfarm/history.py | 143 +++++++++++++++++++++------------------ buildfarm/tests/test_history.py | 32 ++++++++- import-and-analyse.py | 90 +++++-------------------- web/build.py | 113 ++++++++++++++----------------- 5 files changed, 180 insertions(+), 202 deletions(-) Changeset truncated at 500 lines: diff --git a/buildfarm/__init__.py b/buildfarm/__init__.py index 3bbfe4f..d866db4 100644 --- a/buildfarm/__init__.py +++ b/buildfarm/__init__.py @@ -34,6 +34,10 @@ class Tree(object): self.srcdir = srcdir self.scm = scm + def get_branch(self): + from buildfarm.history import GitBranch + return GitBranch(self.repo, self.branch) + def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.name) diff --git a/buildfarm/history.py b/buildfarm/history.py index 6ca1af8..537ce48 100644 --- a/buildfarm/history.py +++ b/buildfarm/history.py @@ -19,81 +19,92 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +from cStringIO import StringIO -from buildfarm import util +from dulwich.objects import Tree +from dulwich.patch import write_blob_diff +from dulwich.repo import Repo -import commands -import os -BASEDIR = "/home/build/master" -HISTORYDIR = "/home/build/master/cache" -TIMEZONE = "PST" -TIMEOFFSET = 0 -UNPACKED_DIR = "/home/ftp/pub/unpacked" +class Branch(object): -class History(object): + def authors(self): + ret = set() + for rev in self.log(): + ret.add(rev.author) + return ret - def __init__(self, db): - self.db = db + def log(self): + raise NotImplementedError(self.log) - def _log(self, tree): - return util.LoadStructure(os.path.join(HISTORYDIR, "history.%s" % tree)) + def diff(self, revision): + raise NotImplementedError(self.diff) - def diff(self, author, date, tree, revision): - """get recent git entries""" - # validate the tree - t = self.db.trees[tree] - if t.scm == "git": - self._git_diff(t, revision, tree) - else: - raise Exception("Unknown VCS %s" % t.scm) +class Revision(object): + + def __init__(self, revision, date, author, message, modified=[], added=[], removed=[]): + self.revision = revision + self.date = date + self.author = author + self.message = message + self.modified = modified + self.added = added + self.removed = removed - def _git_diff(self, t, revision, tree): - """show recent git entries""" - log = self._log(tree) +class GitBranch(object): - # backwards? why? well, usually our users are looking for the newest - # stuff, so it's most likely to be found sooner - for i in range(len(log), 0, -1): - if log[i]["REVISION"] == revision: - entry = log[i] - break + def __init__(self, path, branch="master"): + self.repo = Repo(path) + self.store = self.repo.object_store + self.branch = branch + + def _changes_for(self, commit): + if len(commit.parents) == 0: + parent_tree = Tree().id + else: + parent_tree = self.store[commit.parents[0]].tree + return self.store.tree_changes(parent_tree, commit.tree) + + def _revision_from_commit(self, commit): + added = set() + modified = set() + removed = set() + for ((oldpath, newpath), (oldmode, newmode), (oldsha, newsha)) in self._changes_for(commit): + if oldpath is None: + added.add(newpath) + elif newpath is None: + removed.add(oldpath) + else: + modified.add(newpath) + return Revision(commit.id, commit.commit_time, commit.author, commit.message, modified=modified, removed=removed, added=added) + + def log(self, from_rev=None, exclude_revs=None): + if from_rev is None: + try: + commit = self.repo["refs/heads/%s" % self.branch] + except KeyError: + return + from_rev = commit.id else: - raise Exception("Unable to locate commit information revision[%s]." % revision) - - # get information about the current diff - title = "GIT Diff in %s:%s for revision %s" % ( - tree, t.branch, revision) - - pwd = os.environ["PWD"] - ret = None - try: - os.chdir(os.path.join(UNPACKED_DIR, tree)) - cmd = "git diff %s^ %s ./" % (revision, revision) - ret = (title, entry, tree, [(cmd, commands.getoutput("%s 2> /dev/null" % cmd))]) - - finally: - os.chdir(pwd) - return ret - - def authors(self, tree): - log = self._log(tree) - authors = set() - for entry in log: - authors.add(entry["AUTHOR"]) - return authors - - def history(self, tree, author=None): - """get commit history for the given tree""" - log = self._log(tree) - - # what? backwards? why is that? oh... I know... we want the newest first - for i in range(len(log), 0, -1): - entry = log[i] - if (author is None or - (author == "") or - (author == "ALL") or - (author == entry["AUTHOR"])): - yield entry, tree + from_rev = commit.id + done = set() + pending_commits = [from_rev] + while pending_commits != []: + commit_id = pending_commits.pop(0) + commit = self.repo[commit_id] + yield self._revision_from_commit(commit) + done.add(commit.id) + # FIXME: Add sorted by commit_time + for p in commit.parents: + if exclude_revs is not None and p in exclude_revs: + continue + pending_commits.append(p) + + def diff(self, revision): + commit = self.repo[revision] + f = StringIO() + for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in self._changes_for(commit): + write_blob_diff((oldpath, oldmode, self.store[oldsha]), (newpath, newmode, self.store[newsha])) + return (self._revision_from_commit(commit), f.getvalue()) diff --git a/buildfarm/tests/test_history.py b/buildfarm/tests/test_history.py index 7f80259..29d7c1a 100644 --- a/buildfarm/tests/test_history.py +++ b/buildfarm/tests/test_history.py @@ -15,6 +15,34 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -import testtools +from buildfarm.history import GitBranch -from buildfarm import history +from dulwich.repo import Repo + +import tempfile +from testtools import TestCase + + +class GitBranchTests(TestCase): + + def setUp(self): + super(GitBranchTests, self).setUp() + self.repo = Repo.init(tempfile.mkdtemp()) + + def test_log_empty(self): + branch = GitBranch(self.repo.path, "master") + self.assertEquals([], list(branch.log())) + + def test_log_commits(self): + branch = GitBranch(self.repo.path, "master") + self.repo.do_commit("message", committer="Jelmer Vernooij") + log = list(branch.log()) + self.assertEquals(1, len(log)) + self.assertEquals("message", log[0].message) + + def test_empty_diff(self): + branch = GitBranch(self.repo.path, "master") + revid = self.repo.do_commit("message", committer="Jelmer Vernooij") + entry, diff = list(branch.diff(revid)) + self.assertEquals("message", entry.message) + self.assertEquals("", diff) diff --git a/import-and-analyse.py b/import-and-analyse.py index de7f08d..3a77f77 100755 --- a/import-and-analyse.py +++ b/import-and-analyse.py @@ -14,12 +14,9 @@ from buildfarm import ( BuildFarm, hostdb, ) -import commands from email.mime.text import MIMEText import logging import optparse -import os -import re import smtplib parser = optparse.OptionParser("import-and-analyse [options]") @@ -28,71 +25,12 @@ parser.add_option("--verbose", help="Be verbose", action="count") (opts, args) = parser.parse_args() -UNPACKED_DIR = "/home/ftp/pub/unpacked" - # we open readonly here as only apache(www-run) has write access buildfarm = BuildFarm() -db = buildfarm.builds -hostsdb = buildfarm.hostdb smtp = smtplib.SMTP() smtp.connect() -class Log(object): - - def __init__(self): - self.change_log = None - self.committers = set() - self.authors = set() - self.recipients = None - - -def get_log_git(tree, cur, old): - cmd = "cd %s/%s && git log --pretty=full %s..%s ./" % (UNPACKED_DIR, tree, old, cur) - - log = Log() - - log.change_log = commands.getoutput(cmd) - #print log.change_log - - # get the list of possible culprits - log2 = log.change_log - - for m in re.findall("[\n]*Author: [^<]*<([^>]+)>\nCommit: [^<]*<([^>]+)>\n(.*)$", log.change_log): - author = m.group(1) - committer = m.group(2) - - # handle cherry-picks from svnmirror repo - author = author.replace("0c0555d6-39d7-0310-84fc-f1cc0bd64818", "samba.org") - - # for now only send reports to samba.org addresses. - if not "@samba.org" in author: - author = None - - if author: - log.authors.add(author) - if committer: - log.committers.add(committer) - - # Add a URL to the diffs for each change - log.change_log = re.sub("([\n]*commit ([0-9a-f]+))", "\\1\nhttp:\/\/build.samba.org\/?function=diff;tree=%s;revision=\\2" % tree, log.change_log) - - all = set() - all.update(log.authors) - all.update(log.committers) - log.recipients = all - return log - - -def get_log(tree, cur, old): - treedir = os.path.join(UNPACKED_DIR, tree) - - if os.path.exists(os.path.join(treedir, ".git")): - return get_log_git(tree, cur, old) - else: - raise Exception("Unknown vcs for %s" % treedir) - - def check_and_send_mails(tree, host, compiler, cur, old): t = buildfarm.trees[tree] @@ -111,13 +49,19 @@ def check_and_send_mails(tree, host, compiler, cur, old): print "the build didn't get worse since %r" % old_status return - log = get_log(tree, cur, old) - if not log: - if opts.dry_run: - print "no log" - return + recipients = set() + change_log = "" - recipients = ",".join(log.recipients.keys()) + for rev in t.get_branch().log(from_rev=cur.rev, exclude_revs=set([old.rev])): + recipients.add(rev.author) + recipients.add(rev.committer) + change_log += """ +revision: %s +author: %s +committer: %s +message: + %s +""" % (rev.revision, rev.author, rev.committer, rev.message) body = """ Broken build for tree %(tree)s on host %(host)s with compiler %(compiler)s @@ -132,13 +76,13 @@ See http://build.samba.org/?function=View+Build;host=%(host)s;tree=%(tree)s;comp The build may have been broken by one of the following commits: %(change_log)s - """ % {"tree": tree, "host": host, "compiler": compiler, "change_log": log.change_log, "scm": t.scm, "branch": t.branch, + """ % {"tree": tree, "host": host, "compiler": compiler, "change_log": change_log, "scm": t.scm, "branch": t.branch, "cur_rev": cur_rev, "old_rev": old_rev, "cur_status": cur_status, "old_status": old_status } msg = MIMEText(body) msg["Subject"] = "BUILD of %s:%s BROKEN on %s with %s AT REVISION %s" % (tree, t.branch, host, compiler, cur_rev) msg["From"] = "\"Build Farm\" <bu...@samba.org>" - msg["To"] = recipients + msg["To"] = ",".join(recipients.keys()) smtp.send(msg["From"], [msg["To"]], msg.as_string()) @@ -146,17 +90,17 @@ for build in buildfarm.get_new_builds(): if opts.verbose >= 2: print "Processing %s..." % build - db.upload_build(build) + buildfarm.builds.upload_build(build) (rev, commit_rev, rev_timestamp) = build.revision_details() try: - prev_rev = db.get_previous_revision(build.tree, build.host, build.compiler, rev) + prev_rev = buildfarm.builds.get_previous_revision(build.tree, build.host, build.compiler, rev) except hostdb.NoSuchBuild: # Can't send a nastygram until there are 2 builds.. continue else: - prev_build = db.get_build(build.tree, build.host, build.compiler, prev_rev) + prev_build = buildfarm.get_build(build.tree, build.host, build.compiler, prev_rev) check_and_send_mails(build.tree, build.host, build.compiler, build, prev_build) diff --git a/web/build.py b/web/build.py index b28eb85..13700c1 100755 --- a/web/build.py +++ b/web/build.py @@ -32,7 +32,6 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from buildfarm import ( CachingBuildFarm, data, - history, util, ) @@ -48,7 +47,6 @@ basedir = os.path.abspath(os.path.join(webdir, "..")) buildfarm = CachingBuildFarm() db = data.BuildResultStore(basedir) -#history = history.History(db) hostsdb = buildfarm.hostdb compilers = buildfarm.compilers @@ -804,29 +802,20 @@ def diff_pretty(diff): def web_paths(t, paths): """change the given source paths into links""" - ret = "" - - fmt = None - if t.scm == "git": - r = t.repo - s = t.subdir - b = t.branch - fmt = " <a href=\"%s/?p=%s;a=history;f=%s%%s;h=%s;hb=%s\">%%s</a>" % (GITWEB_BASE, r, s, b, b) + ret = "" + for path in paths: + ret += " <a href=\"%s/?p=%s;a=history;f=%s%s;h=%s;hb=%s\">%s</a>" % (GITWEB_BASE, t.repo, t.subdir, path, t.branch, t.branch, path) + return ret else: - return paths - - for m in re.finditer("\s*([^\s]+)", paths): - ret += fmt % (m.group(1), m.group(1)) - - return ret + raise Exception("Unknown scm %s" % t.scm) def history_row_html(myself, entry, tree): """show one row of history table""" - msg = cgi.escape(entry["MESSAGE"]) - t = time.asctime(time.gmtime(entry["DATE"])) - age = util.dhm_time(time()-entry["DATE"]) + msg = cgi.escape(entry.message) + t = time.asctime(time.gmtime(entry.date)) + age = util.dhm_time(time()-entry.date) t = t.replace(" ", " ") @@ -835,11 +824,11 @@ def history_row_html(myself, entry, tree): <div class=\"datetime\"> <span class=\"date\">%s</span><br /> <span class=\"age\">%s ago</span>""" % (t, age) - if entry["REVISION"]: - yield " - <span class=\"revision\">%s</span><br/>" % entry["REVISION"] - revision_url = "revision=%s" % entry["REVISION"] + if entry.revision: + yield " - <span class=\"revision\">%s</span><br/>" % entry.revision + revision_url = "revision=%s" % entry.revision else: - revision_url = "author=%s" % entry["AUTHOR"] + revision_url = "author=%s" % entry.author yield """ </div> <div class=\"diff\"> <span class=\"html\"><a href=\"%s?function=diff;tree=%s;date=%s;%s\">show diffs</a></span> @@ -852,9 +841,9 @@ def history_row_html(myself, entry, tree): </div> <div class=\"author\"> <span class=\"label\">Author: </span>%s - </div>""" % (myself, tree, entry["DATE"], revision_url, - myself, tree, entry["DATE"], revision_url, - msg, entry["AUTHOR"]) + </div>""" % (myself, tree, entry.date, revision_url, + myself, tree, entry.date, revision_url, + msg, entry.author) t = db.trees.get(tree) @@ -862,45 +851,44 @@ def history_row_html(myself, entry, tree): yield "</div>" return - if entry["FILES"]: + if entry.modified: yield "<div class=\"files\"><span class=\"label\">Modified: </span>" - yield web_paths(t, entry["FILES"]) + yield web_paths(t, entry.modified) yield "</div>\n" - if entry["ADDED"]: + if entry.added: yield "<div class=\"files\"><span class=\"label\">Added: </span>" - yield web_paths(t, entry["ADDED"]) + yield web_paths(t, entry.added) yield "</div>\n" - if entry["REMOVED"]: + if entry.removed: yield "<div class=\"files\"><span class=\"label\">Removed: </span>" - yield web_paths(t, entry["REMOVED"]) + yield web_paths(t, entry.removed) yield "</div>\n" yield "</div>\n" + def history_row_text(entry, tree): """show one row of history table""" - msg = cgi.escape(entry["MESSAGE"]) - t = time.asctime(time.gmtime(entry["DATE"])) - age = util.dhm_time(time()-entry["DATE"]) - - yield "Author: %s\n" % entry["AUTHOR"] - if entry["REVISION"]: - yield "Revision: %s\n" % entry["REVISION"] - yield "Modified: %s\n" % entry["FILES"] - yield "Added: %s\n" % entry["ADDED"] - yield "Removed: %s\n" % entry["REMOVED"] + msg = cgi.escape(entry.message) + t = time.asctime(time.gmtime(entry.date)) -- build.samba.org