restructure and integrate code
This commit is contained in:
parent
27f3811300
commit
a7e139ffb8
7 changed files with 172 additions and 22204 deletions
34
README
34
README
|
@ -8,7 +8,7 @@ $ ./update-portdir.sh
|
||||||
|
|
||||||
At this point, you can already run the following script to get the list of
|
At this point, you can already run the following script to get the list of
|
||||||
Debian source packages that can be mapped to Gentoo packages and also have
|
Debian source packages that can be mapped to Gentoo packages and also have
|
||||||
dependencies that can be dropped in Gentoo:
|
build dependencies that can be dropped in Gentoo:
|
||||||
|
|
||||||
$ ./find_reduced.py
|
$ ./find_reduced.py
|
||||||
|
|
||||||
|
@ -17,37 +17,11 @@ It will print:
|
||||||
- a list of Debian packages that cannot be mapped to Gentoo packages
|
- a list of Debian packages that cannot be mapped to Gentoo packages
|
||||||
- a list of Gentoo packages that cannot be mapped to Debian packages
|
- a list of Gentoo packages that cannot be mapped to Debian packages
|
||||||
|
|
||||||
Above script depends on the following data files to be present:
|
Overview:
|
||||||
|
|
||||||
portage - portage sourcecode
|
portage - portage sourcecode
|
||||||
* retrieved as a git submodule
|
* retrieved as a git submodule
|
||||||
portdir - database of ebuild files
|
portdir - database of ebuild files
|
||||||
* retrieved by ./update-portdir.sh
|
* retrieved by ./update-portdir.sh
|
||||||
reduced_gen_deps.list - Gentoo packages with reduced dependencies
|
deb_source_pkgs.list - list of interesting Debian source packages
|
||||||
* generated by ./gentoo_dep_list.py
|
deb2gen_mapping.list - mapping of Debian source packages to Gentoo packages
|
||||||
gen_source_pkgs.list - list of Gentoo source packages
|
|
||||||
* generated by ./gentoo_pkg_list_raw.sh
|
|
||||||
deb_source_pkgs.list - list of Debian source packages
|
|
||||||
* supplied by yourself
|
|
||||||
deb2gen_mapping.list - mapping of Debian to Gentoo packages
|
|
||||||
* created manually and/or with the help of
|
|
||||||
./find_pkg_matches.py
|
|
||||||
|
|
||||||
deb_source_pkgs.list is filled by the user. All other *.list files can be
|
|
||||||
regenerated by the following scripts:
|
|
||||||
|
|
||||||
$ ./gentoo_dep_list.py
|
|
||||||
|
|
||||||
It writes all source packages with reduced build dependencies plus the build
|
|
||||||
dependencies that can be dropped to reduced_gen_deps.list. Execution takes
|
|
||||||
about 2.5 hours.
|
|
||||||
|
|
||||||
$ ./gentoo_pkg_list_raw.sh
|
|
||||||
|
|
||||||
It writes to gen_source_pkgs.list a list of all available Gentoo packages.
|
|
||||||
|
|
||||||
$ ./find_pkg_matches.py
|
|
||||||
|
|
||||||
Inspects the current content of deb2gen_mapping.list and prints a list of
|
|
||||||
Debian packages that miss a mapping to Gentoo packages as well as a list of
|
|
||||||
suggestions of Gentoo packages that sound similar.
|
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
deb2gen = dict()
|
|
||||||
gen2deb = dict()
|
|
||||||
with open("./deb2gen_mapping.list") as f:
|
|
||||||
for line in f:
|
|
||||||
d, g = line.strip().split('\t')
|
|
||||||
deb2gen[d] = g
|
|
||||||
gen2deb[g] = d
|
|
||||||
|
|
||||||
with open("./deb_source_pkgs.list") as f:
|
|
||||||
debian_names = [p[4:].strip() for p in f]
|
|
||||||
|
|
||||||
with open("./gen_source_pkgs.list") as f:
|
|
||||||
gentoo_names = [p.strip() for p in f]
|
|
||||||
|
|
||||||
for p in debian_names:
|
|
||||||
# if mapping exists, continue
|
|
||||||
orig = p
|
|
||||||
if deb2gen.get(p):
|
|
||||||
continue
|
|
||||||
# first try exact match
|
|
||||||
matches = [g for g in gentoo_names if p == g.split('/')[-1]]
|
|
||||||
if matches:
|
|
||||||
print "%s\t%s"%(orig, matches[0])
|
|
||||||
continue
|
|
||||||
# then try substring matches
|
|
||||||
# remove '-perl' prefix
|
|
||||||
if p.endswith("-perl"):
|
|
||||||
p = p[:-5]
|
|
||||||
# remove 'python-' suffix
|
|
||||||
if p.startswith("python-"):
|
|
||||||
p = p[7:]
|
|
||||||
# remove 'lib' suffix
|
|
||||||
if p.startswith("lib"):
|
|
||||||
p = p[3:]
|
|
||||||
# remove dashes, dots and numbers from the end of the package name
|
|
||||||
p = p.rstrip(".-1234567890")
|
|
||||||
matches = [g for g in gentoo_names if p in g.split('/')[-1].lower()]
|
|
||||||
if matches:
|
|
||||||
print "%s\t%s"%(orig, str(matches))
|
|
||||||
continue
|
|
||||||
# we give up and print the debian source package name that was not matched
|
|
||||||
print orig
|
|
186
find_reduced.py
186
find_reduced.py
|
@ -1,13 +1,48 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print "usage: %s /path/to/Sources.bz"%sys.argv[0]
|
||||||
|
|
||||||
|
import os
|
||||||
from os import path
|
from os import path
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
from functools import cmp_to_key
|
||||||
|
from datetime import datetime
|
||||||
|
import apt_pkg
|
||||||
|
|
||||||
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
|
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
|
||||||
sys.path.insert(0, pym_path)
|
sys.path.insert(0, pym_path)
|
||||||
|
|
||||||
from portage.versions import pkgsplit
|
from portage.versions import pkgcmp, pkgsplit
|
||||||
|
from portage.dep import use_reduce, Atom
|
||||||
|
from portage.exception import InvalidAtom
|
||||||
|
|
||||||
|
# given the starting time, the overall count and the current progress,
|
||||||
|
# return a string that shows the remaining time in hours/minutes/seconds
|
||||||
|
def estimate_remaining_time(before, count, i):
|
||||||
|
now = datetime.now()
|
||||||
|
delta = now-before
|
||||||
|
# delta in seconds
|
||||||
|
delta = delta.days*60*60*24+delta.seconds
|
||||||
|
if delta == 0 or i == 0:
|
||||||
|
return "n.a."
|
||||||
|
speed = float(i)/delta
|
||||||
|
remaining = (count - i)/speed
|
||||||
|
remaining_hours = remaining/3600
|
||||||
|
remaining_minutes = (remaining%3600)/60
|
||||||
|
remaining_seconds = remaining%60
|
||||||
|
return "%02d:%02d:%02d"%(remaining_hours, remaining_minutes, remaining_seconds)
|
||||||
|
|
||||||
|
# flatten an arbitrarily nested list
|
||||||
|
def flatten(l):
|
||||||
|
while l:
|
||||||
|
while l and isinstance(l[0], list):
|
||||||
|
l[0:1] = l[0]
|
||||||
|
if l: yield l.pop(0)
|
||||||
|
|
||||||
|
# TODO: what about mapping to multiple packets?
|
||||||
deb2gen = dict()
|
deb2gen = dict()
|
||||||
gen2deb = dict()
|
gen2deb = dict()
|
||||||
with open("./deb2gen_mapping.list") as f:
|
with open("./deb2gen_mapping.list") as f:
|
||||||
|
@ -19,32 +54,143 @@ with open("./deb2gen_mapping.list") as f:
|
||||||
with open("./deb_source_pkgs.list") as f:
|
with open("./deb_source_pkgs.list") as f:
|
||||||
debian_names = [p[4:].strip() for p in f]
|
debian_names = [p[4:].strip() for p in f]
|
||||||
|
|
||||||
gentoo_deps = dict()
|
portdir = "./portdir"
|
||||||
with open("./reduced_gen_deps.list") as f:
|
|
||||||
for line in f:
|
|
||||||
p = line.strip().split()
|
|
||||||
gentoo_deps[pkgsplit(p[0])[0]] = p[1:]
|
|
||||||
|
|
||||||
missing_deb_mappings = list()
|
pkgnames = list()
|
||||||
|
|
||||||
|
# traverse ./portdir to find .ebuild files that interest us
|
||||||
|
for cat in os.listdir(portdir):
|
||||||
|
catpath = os.path.join(portdir, cat)
|
||||||
|
|
||||||
|
if not os.path.isdir(catpath):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for pkg in os.listdir(catpath):
|
||||||
|
pkgpath = os.path.join(catpath, pkg)
|
||||||
|
|
||||||
|
if not os.path.isdir(pkgpath):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# get all .ebuild files
|
||||||
|
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
|
||||||
|
|
||||||
|
if not pkgs:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# only grab newest package
|
||||||
|
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
|
||||||
|
deb = gen2deb.get("%s/%s"%(cat, pkg))
|
||||||
|
|
||||||
|
# assure that the package is relevant
|
||||||
|
if deb not in debian_names:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pkgnames.append((cat, pkg, ver, rev))
|
||||||
|
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
|
||||||
|
|
||||||
|
sys.stderr.write("\rGenerating list of packages... Done.\n")
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
before = datetime.now()
|
||||||
|
|
||||||
|
debian_deps = dict()
|
||||||
missing_gen_mappings = list()
|
missing_gen_mappings = list()
|
||||||
|
|
||||||
for d in debian_names:
|
# look for reduced build dependencies
|
||||||
g = deb2gen.get(d)
|
for i, (cat, pkg, ver, rev) in enumerate(pkgnames):
|
||||||
if g:
|
if rev == "r0":
|
||||||
deps = gentoo_deps.get(g)
|
pkgname = "%s/%s-%s"%(cat, pkg, ver)
|
||||||
if deps:
|
else:
|
||||||
|
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
|
||||||
|
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
|
||||||
|
r = p.communicate()
|
||||||
|
if p.returncode != 0:
|
||||||
|
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
|
||||||
|
depend = r[0]
|
||||||
|
all_use = use_reduce(depend, matchall=True)
|
||||||
|
no_use = use_reduce(depend, matchnone=True)
|
||||||
|
l = list()
|
||||||
|
for a in all_use:
|
||||||
|
# filter out all dependencies that are also present without USE flags enabled
|
||||||
|
if a in no_use:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not isinstance(a, list) :
|
||||||
|
a = [a]
|
||||||
|
else:
|
||||||
|
# there are disjunctions but we better collect to many build dependencies than too few
|
||||||
|
a = flatten(a)
|
||||||
|
|
||||||
|
for p in a:
|
||||||
|
if p == "||":
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
l.append(Atom(p).cp)
|
||||||
|
except InvalidAtom:
|
||||||
|
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(p)))
|
||||||
|
except TypeError:
|
||||||
|
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(p)))
|
||||||
|
|
||||||
|
if l:
|
||||||
ddeps = []
|
ddeps = []
|
||||||
for p in deps:
|
for p in l:
|
||||||
r = gen2deb.get(p)
|
r = gen2deb.get(p)
|
||||||
if r:
|
|
||||||
ddeps.append(r)
|
if not r:
|
||||||
else:
|
|
||||||
missing_gen_mappings.append(p)
|
missing_gen_mappings.append(p)
|
||||||
ddeps = list(set(ddeps))
|
continue
|
||||||
if ddeps:
|
|
||||||
print d, ddeps
|
ddeps.append(r)
|
||||||
else:
|
debian_deps["%s/%s"%(cat,pkg)] = list(set(ddeps))
|
||||||
|
count +=1
|
||||||
|
|
||||||
|
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
||||||
|
|
||||||
|
missing_deb_mappings = list()
|
||||||
|
|
||||||
|
src2bin = dict()
|
||||||
|
src_deps = dict()
|
||||||
|
|
||||||
|
for pkg in apt_pkg.TagFile(sys.argv[1]):
|
||||||
|
p = pkg['Package']
|
||||||
|
try:
|
||||||
|
src_deps[p] = [name for (name,ver,rel) in flatten(apt_pkg.parse_src_depends(pkg.get('Build-Depends', '')))]
|
||||||
|
except ValueError:
|
||||||
|
print "cannot parse depends line for %s: %s"%(pkg['Package'], pkg.get('Build-Depends'))
|
||||||
|
src_deps[p] = []
|
||||||
|
src2bin[p] = [b.strip() for b in pkg['Binary'].split(',')]
|
||||||
|
|
||||||
|
# process all debian source packages
|
||||||
|
for d in debian_names:
|
||||||
|
# find gentoo package name
|
||||||
|
g = deb2gen.get(d)
|
||||||
|
if not g:
|
||||||
missing_deb_mappings.append(d)
|
missing_deb_mappings.append(d)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# get the droppable dependencies
|
||||||
|
deps = debian_deps.get(g)
|
||||||
|
|
||||||
|
if not deps:
|
||||||
|
continue
|
||||||
|
|
||||||
|
bindeps = list()
|
||||||
|
# get the debian build dependencies
|
||||||
|
pdeps = src_deps[d]
|
||||||
|
|
||||||
|
# for each droppable dependency
|
||||||
|
for r in deps:
|
||||||
|
# convert source package to binary packages
|
||||||
|
# only retain those binary packages that are a build dependency
|
||||||
|
bindeps.extend([p for p in src2bin[r] if p in pdeps])
|
||||||
|
|
||||||
|
if bindeps:
|
||||||
|
print d, bindeps
|
||||||
|
|
||||||
print list(set(missing_deb_mappings))
|
print list(set(missing_deb_mappings))
|
||||||
print list(set(missing_gen_mappings))
|
print list(set(missing_gen_mappings))
|
||||||
|
|
15840
gen_source_pkgs.list
15840
gen_source_pkgs.list
File diff suppressed because it is too large
Load diff
|
@ -1,96 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
from os import path
|
|
||||||
from subprocess import Popen, PIPE
|
|
||||||
from functools import cmp_to_key
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
pym_path = path.join(path.dirname(path.realpath(__file__)), "portage", "pym")
|
|
||||||
sys.path.insert(0, pym_path)
|
|
||||||
|
|
||||||
from portage.versions import pkgcmp, pkgsplit
|
|
||||||
from portage.dep import use_reduce, Atom
|
|
||||||
from portage.exception import InvalidAtom
|
|
||||||
|
|
||||||
def format_timedelta(delta):
|
|
||||||
secs = delta.days*60*60*24+delta.seconds
|
|
||||||
hours = secs/3600
|
|
||||||
minutes = (secs%3600)/60
|
|
||||||
return "%02d:%02d"%(hours, minutes)
|
|
||||||
|
|
||||||
def estimate_remaining_time(before, count, i):
|
|
||||||
now = datetime.now()
|
|
||||||
delta = now-before
|
|
||||||
# delta in seconds
|
|
||||||
delta = delta.days*60*60*24+delta.seconds
|
|
||||||
if delta == 0:
|
|
||||||
return "n.a."
|
|
||||||
speed = float(i)/delta
|
|
||||||
remaining = (count - i)/speed
|
|
||||||
remaining_hours = remaining/3600
|
|
||||||
remaining_minutes = (remaining%3600)/60
|
|
||||||
return "%02d:%02d"%(remaining_hours, remaining_minutes)
|
|
||||||
|
|
||||||
portdir = "./portdir"
|
|
||||||
|
|
||||||
pkgnames = list()
|
|
||||||
|
|
||||||
for cat in os.listdir(portdir):
|
|
||||||
catpath = os.path.join(portdir, cat)
|
|
||||||
if not os.path.isdir(catpath):
|
|
||||||
continue
|
|
||||||
for pkg in os.listdir(catpath):
|
|
||||||
pkgpath = os.path.join(catpath, pkg)
|
|
||||||
if not os.path.isdir(pkgpath):
|
|
||||||
continue
|
|
||||||
pkgs = [pkgsplit(ver[:-7]) for ver in os.listdir(pkgpath) if ver.endswith(".ebuild")]
|
|
||||||
if len(pkgs) > 0:
|
|
||||||
# only grab newest package
|
|
||||||
pkg, ver, rev = sorted(pkgs, key=cmp_to_key(pkgcmp), reverse=True)[0]
|
|
||||||
if rev == "r0":
|
|
||||||
pkgname = "%s/%s-%s"%(cat, pkg, ver)
|
|
||||||
else:
|
|
||||||
pkgname = "%s/%s-%s-%s"%(cat, pkg, ver, rev)
|
|
||||||
pkgnames.append(pkgname)
|
|
||||||
sys.stderr.write("\rGenerating list of packages... %d"%len(pkgnames))
|
|
||||||
|
|
||||||
sys.stderr.write("\rGenerating list of packages... Done.\n")
|
|
||||||
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
deplist = open("./out", "wb")
|
|
||||||
|
|
||||||
before = datetime.now()
|
|
||||||
|
|
||||||
for i, pkgname in enumerate(pkgnames):
|
|
||||||
p = Popen(["./portage/bin/portageq", "metadata", "/", "ebuild", pkgname, "DEPEND"], stderr=PIPE, stdout=PIPE, env={"PORTDIR": portdir})
|
|
||||||
r = p.communicate()
|
|
||||||
if p.returncode != 0:
|
|
||||||
sys.stderr.write("cannot parse %s. Output: %s\n"%(pkgname, r[1]))
|
|
||||||
depend = r[0]
|
|
||||||
all_use = use_reduce(depend, matchall=True)
|
|
||||||
no_use = use_reduce(depend, matchnone=True)
|
|
||||||
# cannot use sets because of possible sublists which are not hashable
|
|
||||||
l = []
|
|
||||||
for a in all_use:
|
|
||||||
# FIXME: do not discard disjunctions
|
|
||||||
if a not in no_use and a != "||" and not isinstance(a, list):
|
|
||||||
try:
|
|
||||||
l.append(Atom(a).cp)
|
|
||||||
except InvalidAtom:
|
|
||||||
sys.stderr.write("Invalid Atom for %s: %s\n"%(pkgname, str(a)))
|
|
||||||
except TypeError:
|
|
||||||
sys.stderr.write("TypeError for %s: %s\n"%(pkgname, str(a)))
|
|
||||||
|
|
||||||
if len(l) > 0:
|
|
||||||
deplist.write("%s %s\n"%(pkgname, " ".join(l)))
|
|
||||||
count +=1
|
|
||||||
|
|
||||||
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
sys.stderr.write("\rFinding reduced dependencies: %d/%d, found %d. Estimated time left: %s h\n"%(i, len(pkgnames), count, estimate_remaining_time(before, len(pkgnames), i)))
|
|
||||||
|
|
||||||
sys.stderr.write("Done. Took %s h\n"%format_timedelta(datetime.now()-before))
|
|
|
@ -1,5 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
for f in `find "./portdir/" -mindepth 2 -maxdepth 2 -type d`; do
|
|
||||||
echo ${f#./portdir/}
|
|
||||||
done > gen_source_pkgs.list
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue