#!/usr/bin/env python # # Copyright (C) 2012-2013 Johannes 'josch' Schauer # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import sys if len(sys.argv) != 2: print "usage: %s /path/to/Sources.bz"%sys.argv[0] exit(0) import os from os import path from collections import Counter, defaultdict from subprocess import Popen, PIPE from functools import cmp_to_key from datetime import datetime import apt_pkg pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym") sys.path.insert(0, pym_path) from portage.versions import pkgcmp, pkgsplit from portage.dep import use_reduce, Atom from portage.exception import InvalidAtom # given the starting time, the overall count and the current progress, # return a string that shows the remaining time in hours/minutes/seconds def estimate_remaining_time(before, count, i): now = datetime.now() delta = now-before # delta in seconds delta = delta.days*60*60*24+delta.seconds if delta == 0 or i == 0: return "n.a." speed = float(i)/delta remaining = (count - i)/speed remaining_hours = remaining/3600 remaining_minutes = (remaining%3600)/60 remaining_seconds = remaining%60 return "%02d:%02d:%02d"%(remaining_hours, remaining_minutes, remaining_seconds) # flatten an arbitrarily nested list def flatten(l): while l: while l and isinstance(l[0], list): l[0:1] = l[0] if l: yield l.pop(0) # read mapping between Gentoo and Debian source packages deb2gen = defaultdict(list) gen2deb = defaultdict(list) with open("./deb2gen_mapping.list") as f: for line in f: d, g = line.strip().split('\t') deb2gen[d].append(g) gen2deb[g].append(d) with open("./deb_source_pkgs.list") as f: debian_names = [p[4:].strip() for p in f] portdir = "./portdir" pkgnames = list() # traverse ./portdir to find .ebuild files that interest us for cat in os.listdir(portdir): catpath = os.path.join(portdir, cat) if not os.path.isdir(catpath): continue for pkg in os.listdir(catpath): pkgpath = os.path.join(catpath, pkg) if not os.path.isdir(pkgpath): continue # get all .ebuild files pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")] if not pkgs: continue # only grab newest package pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0] deb = gen2deb.get("%s/%s"%(cat, pkg), []) # assure that the package is relevant if not any(d for d in deb if d in debian_names): continue pkgnames.append((cat, pkg, ver, rev)) sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames)) sys.stderr.write("\rGenerating list of packages... Done.\n") count = 0 before = datetime.now() debian_deps = dict() missing_gen_mappings = list() # look for reduced build dependencies of gentoo packages by extracting all # dependencies that need a USE flag to be set for i, (cat, pkg, ver, rev) in enumerate(pkgnames): if rev == "r0": pkgname = "%s/%s-%s"%(cat, pkg, ver) else: pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev) p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir}) r = p.communicate() if p.returncode != 0: sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1])) depend = r[0] all_use = use_reduce(depend, matchall=True) no_use = use_reduce(depend, matchnone=True) l = list() for a in all_use: # filter out all dependencies that are also present when USE flags are disabled if a in no_use: continue if not isinstance(a, list) : a = [a] else: # there are disjunctions but we better collect to many build dependencies than too few a = flatten(a) for p in a: if p == "||": continue try: l.append(Atom(p).cp) except InvalidAtom: sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(p))) except TypeError: sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(p))) if l: ddeps = [] for p in l: r = gen2deb.get(p) if not r: missing_gen_mappings.append(p) continue ddeps.extend(r) debian_deps["%s/%s"%(cat,pkg)] = list(set(ddeps)) count +=1 sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i+1, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i))) sys.stdout.flush() sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i+1, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i))) missing_deb_mappings = list() src2bin = dict() src_deps = dict() # get the actual Debian build dependencies for all interesting packages from a # Sources file for pkg in apt_pkg.TagFile(sys.argv[1]): p = pkg['Package'] try: src_deps[p] = [name for (name,ver,rel) in flatten(apt_pkg.parse_src_depends(pkg.get('Build-Depends', '')))] except ValueError: print "cannot parse depends line for %s: %s"%(pkg['Package'], pkg.get('Build-Depends')) src_deps[p] = [] src2bin[p] = [b.strip() for b in pkg['Binary'].split(',')] all_droppable = list() # process all debian source packages and output those build dependencies that # build from a source package which is not needed by the corresponding Gentoo # package for d in debian_names: # find gentoo package name g = deb2gen.get(d) if not g: missing_deb_mappings.append(d) continue # get the droppable dependencies deps = [] for r in g: deps.extend(debian_deps.get(r, [])) if not deps: continue bindeps = list() # get the debian build dependencies pdeps = src_deps[d] # for each droppable dependency for r in deps: # convert source package to binary packages # only retain those binary packages that are a build dependency bindeps.extend([p for p in src2bin.get(r, []) if p in pdeps]) if bindeps: print "src:%s %s"%(d, " ".join(bindeps)) all_droppable.extend(bindeps) sys.stderr.write("missing debsrc mapping: %s\nmissing gentoo mapping: %s\n"%(" ".join(sorted(set(missing_deb_mappings)))," ".join(sorted(set(missing_gen_mappings))))) drop_freq = Counter(all_droppable) for package, freq in drop_freq.most_common(20): sys.stderr.write("%s: %s\n"%(package, freq))