On 10/14/2014 02:53 AM, Duncan wrote: > What about homepage? An index for it too?
If we really want to index the homepage, then a more extensible format might be better. For example, each line of the index could be a JSON object like this: {"description": "sandbox'd LD_PRELOAD hack", "homepage": "http://www.gentoo.org/proj/en/portage/sandbox/", "package_versions": "sys-apps/sandbox-1.6-r2,2.3-r1,2.4,2.5,2.6-r1"} The attached script generates this JSON lines format. -- Thanks, Zac
#!/usr/bin/env python import json import os import sys import portage from portage.versions import _pkg_str usage = "usage: %s <repo>\n" % os.path.basename(sys.argv[0]) def main(args): if len(args) != 1: sys.stderr.write(usage) return 1 repo_name = args[0] repo_info = portage.settings.repositories.prepos.get(repo_name) if repo_info is None: sys.stderr.write("unknown repo: %s\n" % repo_name) return 1 json_write_opts = { "ensure_ascii": False, "sort_keys": True } portdb = portage.db[portage.root]["porttree"].dbapi portdb.porttrees = [repo_info.location] f = sys.stdout if sys.hexversion >= 0x3000000: f = f.buffer class duplicates(object): cp = None desc = None homepage = None pkgs = [] def flush_duplicates(): if duplicates.pkgs: if len(duplicates.pkgs) == 1: package_versions = duplicates.pkgs[0] else: package_versions = "%s,%s" % (duplicates.pkgs[0], ",".join(pkg.version for pkg in duplicates.pkgs[1:])) obj = { "package_versions": package_versions, "description": duplicates.desc, "homepage": duplicates.homepage } f.write(json.dumps(obj, **json_write_opts).encode('utf_8')) f.write(b'\n') del duplicates.pkgs[:] for cp in portdb.cp_all(): for cpv in portdb.cp_list(cp): desc, homepage = portdb.aux_get(cpv, ["DESCRIPTION", "HOMEPAGE"]) if duplicates.cp != cp or \ duplicates.desc != desc or \ duplicates.homepage != homepage: flush_duplicates() duplicates.cp = cp duplicates.desc = desc duplicates.homepage = homepage duplicates.pkgs.append(_pkg_str(cpv)) flush_duplicates() return os.EX_OK if __name__ == '__main__': sys.exit(main(sys.argv[1:]))