restructure and integrate code
This commit is contained in:
parent
27f3811300
commit
a7e139ffb8
7 changed files with 172 additions and 22204 deletions
34
README
34
README
|
@ -8,7 +8,7 @@ $ ./update-portdir.sh
|
|||
|
||||
At this point, you can already run the following script to get the list of
|
||||
Debian source packages that can be mapped to Gentoo packages and also have
|
||||
dependencies that can be dropped in Gentoo:
|
||||
build dependencies that can be dropped in Gentoo:
|
||||
|
||||
$ ./find_reduced.py
|
||||
|
||||
|
@ -17,37 +17,11 @@ It will print:
|
|||
- a list of Debian packages that cannot be mapped to Gentoo packages
|
||||
- a list of Gentoo packages that cannot be mapped to Debian packages
|
||||
|
||||
Above script depends on the following data files to be present:
|
||||
Overview:
|
||||
|
||||
portage - portage sourcecode
|
||||
* retrieved as a git submodule
|
||||
portdir - database of ebuild files
|
||||
* retrieved by ./update-portdir.sh
|
||||
reduced_gen_deps.list - Gentoo packages with reduced dependencies
|
||||
* generated by ./gentoo_dep_list.py
|
||||
gen_source_pkgs.list - list of Gentoo source packages
|
||||
* generated by ./gentoo_pkg_list_raw.sh
|
||||
deb_source_pkgs.list - list of Debian source packages
|
||||
* supplied by yourself
|
||||
deb2gen_mapping.list - mapping of Debian to Gentoo packages
|
||||
* created manually and/or with the help of
|
||||
./find_pkg_matches.py
|
||||
|
||||
deb_source_pkgs.list is filled by the user. All other *.list files can be
|
||||
regenerated by the following scripts:
|
||||
|
||||
$ ./gentoo_dep_list.py
|
||||
|
||||
It writes all source packages with reduced build dependencies plus the build
|
||||
dependencies that can be dropped to reduced_gen_deps.list. Execution takes
|
||||
about 2.5 hours.
|
||||
|
||||
$ ./gentoo_pkg_list_raw.sh
|
||||
|
||||
It writes to gen_source_pkgs.list a list of all available Gentoo packages.
|
||||
|
||||
$ ./find_pkg_matches.py
|
||||
|
||||
Inspects the current content of deb2gen_mapping.list and prints a list of
|
||||
Debian packages that miss a mapping to Gentoo packages as well as a list of
|
||||
suggestions of Gentoo packages that sound similar.
|
||||
deb_source_pkgs.list - list of interesting Debian source packages
|
||||
deb2gen_mapping.list - mapping of Debian source packages to Gentoo packages
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
|
||||
deb2gen = dict()
|
||||
gen2deb = dict()
|
||||
with open("./deb2gen_mapping.list") as f:
|
||||
for line in f:
|
||||
d, g = line.strip().split('\t')
|
||||
deb2gen[d] = g
|
||||
gen2deb[g] = d
|
||||
|
||||
with open("./deb_source_pkgs.list") as f:
|
||||
debian_names = [p[4:].strip() for p in f]
|
||||
|
||||
with open("./gen_source_pkgs.list") as f:
|
||||
gentoo_names = [p.strip() for p in f]
|
||||
|
||||
for p in debian_names:
|
||||
# if mapping exists, continue
|
||||
orig = p
|
||||
if deb2gen.get(p):
|
||||
continue
|
||||
# first try exact match
|
||||
matches = [g for g in gentoo_names if p == g.split('/')[-1]]
|
||||
if matches:
|
||||
print "%s\t%s"%(orig, matches[0])
|
||||
continue
|
||||
# then try substring matches
|
||||
# remove '-perl' prefix
|
||||
if p.endswith("-perl"):
|
||||
p = p[:-5]
|
||||
# remove 'python-' suffix
|
||||
if p.startswith("python-"):
|
||||
p = p[7:]
|
||||
# remove 'lib' suffix
|
||||
if p.startswith("lib"):
|
||||
p = p[3:]
|
||||
# remove dashes, dots and numbers from the end of the package name
|
||||
p = p.rstrip(".-1234567890")
|
||||
matches = [g for g in gentoo_names if p in g.split('/')[-1].lower()]
|
||||
if matches:
|
||||
print "%s\t%s"%(orig, str(matches))
|
||||
continue
|
||||
# we give up and print the debian source package name that was not matched
|
||||
print orig
|
186
find_reduced.py
186
find_reduced.py
|
@ -1,13 +1,48 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "usage: %s /path/to/Sources.bz"%sys.argv[0]
|
||||
|
||||
import os
|
||||
from os import path
|
||||
from subprocess import Popen, PIPE
|
||||
from functools import cmp_to_key
|
||||
from datetime import datetime
|
||||
import apt_pkg
|
||||
|
||||
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
|
||||
sys.path.insert(0, pym_path)
|
||||
|
||||
from portage.versions import pkgsplit
|
||||
from portage.versions import pkgcmp, pkgsplit
|
||||
from portage.dep import use_reduce, Atom
|
||||
from portage.exception import InvalidAtom
|
||||
|
||||
# given the starting time, the overall count and the current progress,
|
||||
# return a string that shows the remaining time in hours/minutes/seconds
|
||||
def estimate_remaining_time(before, count, i):
|
||||
now = datetime.now()
|
||||
delta = now-before
|
||||
# delta in seconds
|
||||
delta = delta.days*60*60*24+delta.seconds
|
||||
if delta == 0 or i == 0:
|
||||
return "n.a."
|
||||
speed = float(i)/delta
|
||||
remaining = (count - i)/speed
|
||||
remaining_hours = remaining/3600
|
||||
remaining_minutes = (remaining%3600)/60
|
||||
remaining_seconds = remaining%60
|
||||
return "%02d:%02d:%02d"%(remaining_hours, remaining_minutes, remaining_seconds)
|
||||
|
||||
# flatten an arbitrarily nested list
|
||||
def flatten(l):
|
||||
while l:
|
||||
while l and isinstance(l[0], list):
|
||||
l[0:1] = l[0]
|
||||
if l: yield l.pop(0)
|
||||
|
||||
# TODO: what about mapping to multiple packets?
|
||||
deb2gen = dict()
|
||||
gen2deb = dict()
|
||||
with open("./deb2gen_mapping.list") as f:
|
||||
|
@ -19,32 +54,143 @@ with open("./deb2gen_mapping.list") as f:
|
|||
with open("./deb_source_pkgs.list") as f:
|
||||
debian_names = [p[4:].strip() for p in f]
|
||||
|
||||
gentoo_deps = dict()
|
||||
with open("./reduced_gen_deps.list") as f:
|
||||
for line in f:
|
||||
p = line.strip().split()
|
||||
gentoo_deps[pkgsplit(p[0])[0]] = p[1:]
|
||||
portdir = "./portdir"
|
||||
|
||||
missing_deb_mappings = list()
|
||||
pkgnames = list()
|
||||
|
||||
# traverse ./portdir to find .ebuild files that interest us
|
||||
for cat in os.listdir(portdir):
|
||||
catpath = os.path.join(portdir, cat)
|
||||
|
||||
if not os.path.isdir(catpath):
|
||||
continue
|
||||
|
||||
for pkg in os.listdir(catpath):
|
||||
pkgpath = os.path.join(catpath, pkg)
|
||||
|
||||
if not os.path.isdir(pkgpath):
|
||||
continue
|
||||
|
||||
# get all .ebuild files
|
||||
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
|
||||
|
||||
if not pkgs:
|
||||
continue
|
||||
|
||||
# only grab newest package
|
||||
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
|
||||
deb = gen2deb.get("%s/%s"%(cat, pkg))
|
||||
|
||||
# assure that the package is relevant
|
||||
if deb not in debian_names:
|
||||
continue
|
||||
|
||||
pkgnames.append((cat, pkg, ver, rev))
|
||||
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
|
||||
|
||||
sys.stderr.write("\rGenerating list of packages... Done.\n")
|
||||
|
||||
count = 0
|
||||
|
||||
before = datetime.now()
|
||||
|
||||
debian_deps = dict()
|
||||
missing_gen_mappings = list()
|
||||
|
||||
for d in debian_names:
|
||||
g = deb2gen.get(d)
|
||||
if g:
|
||||
deps = gentoo_deps.get(g)
|
||||
if deps:
|
||||
# look for reduced build dependencies
|
||||
for i, (cat, pkg, ver, rev) in enumerate(pkgnames):
|
||||
if rev == "r0":
|
||||
pkgname = "%s/%s-%s"%(cat, pkg, ver)
|
||||
else:
|
||||
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
|
||||
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
|
||||
r = p.communicate()
|
||||
if p.returncode != 0:
|
||||
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
|
||||
depend = r[0]
|
||||
all_use = use_reduce(depend, matchall=True)
|
||||
no_use = use_reduce(depend, matchnone=True)
|
||||
l = list()
|
||||
for a in all_use:
|
||||
# filter out all dependencies that are also present without USE flags enabled
|
||||
if a in no_use:
|
||||
continue
|
||||
|
||||
if not isinstance(a, list) :
|
||||
a = [a]
|
||||
else:
|
||||
# there are disjunctions but we better collect to many build dependencies than too few
|
||||
a = flatten(a)
|
||||
|
||||
for p in a:
|
||||
if p == "||":
|
||||
continue
|
||||
|
||||
try:
|
||||
l.append(Atom(p).cp)
|
||||
except InvalidAtom:
|
||||
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(p)))
|
||||
except TypeError:
|
||||
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(p)))
|
||||
|
||||
if l:
|
||||
ddeps = []
|
||||
for p in deps:
|
||||
for p in l:
|
||||
r = gen2deb.get(p)
|
||||
if r:
|
||||
ddeps.append(r)
|
||||
else:
|
||||
|
||||
if not r:
|
||||
missing_gen_mappings.append(p)
|
||||
ddeps = list(set(ddeps))
|
||||
if ddeps:
|
||||
print d, ddeps
|
||||
else:
|
||||
continue
|
||||
|
||||
ddeps.append(r)
|
||||
debian_deps["%s/%s"%(cat,pkg)] = list(set(ddeps))
|
||||
count +=1
|
||||
|
||||
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
||||
sys.stdout.flush()
|
||||
|
||||
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
||||
|
||||
missing_deb_mappings = list()
|
||||
|
||||
src2bin = dict()
|
||||
src_deps = dict()
|
||||
|
||||
for pkg in apt_pkg.TagFile(sys.argv[1]):
|
||||
p = pkg['Package']
|
||||
try:
|
||||
src_deps[p] = [name for (name,ver,rel) in flatten(apt_pkg.parse_src_depends(pkg.get('Build-Depends', '')))]
|
||||
except ValueError:
|
||||
print "cannot parse depends line for %s: %s"%(pkg['Package'], pkg.get('Build-Depends'))
|
||||
src_deps[p] = []
|
||||
src2bin[p] = [b.strip() for b in pkg['Binary'].split(',')]
|
||||
|
||||
# process all debian source packages
|
||||
for d in debian_names:
|
||||
# find gentoo package name
|
||||
g = deb2gen.get(d)
|
||||
if not g:
|
||||
missing_deb_mappings.append(d)
|
||||
continue
|
||||
|
||||
# get the droppable dependencies
|
||||
deps = debian_deps.get(g)
|
||||
|
||||
if not deps:
|
||||
continue
|
||||
|
||||
bindeps = list()
|
||||
# get the debian build dependencies
|
||||
pdeps = src_deps[d]
|
||||
|
||||
# for each droppable dependency
|
||||
for r in deps:
|
||||
# convert source package to binary packages
|
||||
# only retain those binary packages that are a build dependency
|
||||
bindeps.extend([p for p in src2bin[r] if p in pdeps])
|
||||
|
||||
if bindeps:
|
||||
print d, bindeps
|
||||
|
||||
print list(set(missing_deb_mappings))
|
||||
print list(set(missing_gen_mappings))
|
||||
|
|
15840
gen_source_pkgs.list
15840
gen_source_pkgs.list
File diff suppressed because it is too large
Load diff
|
@ -1,96 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
from os import path
|
||||
from subprocess import Popen, PIPE
|
||||
from functools import cmp_to_key
|
||||
from datetime import datetime
|
||||
|
||||
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
|
||||
sys.path.insert(0, pym_path)
|
||||
|
||||
from portage.versions import pkgcmp, pkgsplit
|
||||
from portage.dep import use_reduce, Atom
|
||||
from portage.exception import InvalidAtom
|
||||
|
||||
def format_timedelta(delta):
|
||||
secs = delta.days*60*60*24+delta.seconds
|
||||
hours = secs/3600
|
||||
minutes = (secs%3600)/60
|
||||
return "%02d:%02d"%(hours, minutes)
|
||||
|
||||
def estimate_remaining_time(before, count, i):
|
||||
now = datetime.now()
|
||||
delta = now-before
|
||||
# delta in seconds
|
||||
delta = delta.days*60*60*24+delta.seconds
|
||||
if delta == 0:
|
||||
return "n.a."
|
||||
speed = float(i)/delta
|
||||
remaining = (count - i)/speed
|
||||
remaining_hours = remaining/3600
|
||||
remaining_minutes = (remaining%3600)/60
|
||||
return "%02d:%02d"%(remaining_hours, remaining_minutes)
|
||||
|
||||
portdir = "./portdir"
|
||||
|
||||
pkgnames = list()
|
||||
|
||||
for cat in os.listdir(portdir):
|
||||
catpath = os.path.join(portdir, cat)
|
||||
if not os.path.isdir(catpath):
|
||||
continue
|
||||
for pkg in os.listdir(catpath):
|
||||
pkgpath = os.path.join(catpath, pkg)
|
||||
if not os.path.isdir(pkgpath):
|
||||
continue
|
||||
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
|
||||
if len(pkgs) > 0:
|
||||
# only grab newest package
|
||||
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
|
||||
if rev == "r0":
|
||||
pkgname = "%s/%s-%s"%(cat, pkg, ver)
|
||||
else:
|
||||
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
|
||||
pkgnames.append(pkgname)
|
||||
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
|
||||
|
||||
sys.stderr.write("\rGenerating list of packages... Done.\n")
|
||||
|
||||
count = 0
|
||||
|
||||
deplist = open("./out", "wb")
|
||||
|
||||
before = datetime.now()
|
||||
|
||||
for i, pkgname in enumerate(pkgnames):
|
||||
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
|
||||
r = p.communicate()
|
||||
if p.returncode != 0:
|
||||
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
|
||||
depend = r[0]
|
||||
all_use = use_reduce(depend, matchall=True)
|
||||
no_use = use_reduce(depend, matchnone=True)
|
||||
# cannot use sets because of possible sublists which are not hashable
|
||||
l = []
|
||||
for a in all_use:
|
||||
# FIXME: do not discard disjunctions
|
||||
if a not in no_use and a != "||" and not isinstance(a, list):
|
||||
try:
|
||||
l.append(Atom(a).cp)
|
||||
except InvalidAtom:
|
||||
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(a)))
|
||||
except TypeError:
|
||||
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(a)))
|
||||
|
||||
if len(l) > 0:
|
||||
deplist.write("%s %s\n"%(pkgname, " ".join(l)))
|
||||
count +=1
|
||||
|
||||
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
||||
sys.stdout.flush()
|
||||
|
||||
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
||||
|
||||
sys.stderr.write("Done. Took %s h\n"%format_timedelta(datetime.now()-before))
|
|
@ -1,5 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
for f in `find "./portdir/" -mindepth 2 -maxdepth 2 -type d`; do
|
||||
echo ${f#./portdir/}
|
||||
done > gen_source_pkgs.list
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue