Public
Snippet $1 authored by josch

package graph for finding reverse closures

packagegraph.ml
(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) 2012-2015 Johannes 'josch' Schauer <j.schauer@email.de> *)
(*  Copyright (C) 2012-2015 Pietro Abate <pietro.abate@pps.jussieu.fr>    *)
(*                                                                        *)
(*  This library is free software: you can redistribute it and/or modify  *)
(*  it under the terms of the GNU Lesser General Public License as        *)
(*  published by the Free Software Foundation, either version 3 of the    *)
(*  License, or (at your option) any later version.  A special linking    *)
(*  exception to the GNU Lesser General Public License applies to this    *)
(*  library, see the COPYING file for more information.                   *)
(**************************************************************************)

module PackageGraphCondensed = struct
  module IntSet = BootstrapCommon.IntSet
  module PkgV = struct
    type t =
      | Pkg of int
      | SCC of IntSet.t
    let compare x y = match (x,y) with
      | Pkg p1, Pkg p2 -> Pervasives.compare p1 p2
      | SCC s1, SCC s2 -> IntSet.compare s1 s2
      | Pkg _, SCC _ -> -1
      | SCC _, Pkg _ -> 1
    let hash = function
      (* since package ids are unique in the graph, the minimum set element unique
       * identifies sets *)
      | Pkg p -> p
      | SCC s -> IntSet.min_elt s
    let equal x y = (compare x y) = 0
  end

  module G = Graph.Imperative.Digraph.ConcreteBidirectional(PkgV)

  let dependency_graph ?(reverse=false) ?(global_constraints=true) universe =
    let gr = G.create () in
    let add_edge v1 v2 = if reverse then
        G.add_edge gr v2 v1
      else
        G.add_edge gr v1 v2
    in
    let essential = if global_constraints then begin
      let essential = Cudf.get_packages
          ~filter:(fun pkg ->
              BootstrapCommon.debtype_of_cudfpkg pkg = `BinPkg
              && BootstrapCommon.debessential_of_cudfpkg pkg)
          universe
      in
      List.map (fun pkg -> PkgV.Pkg (CudfAdd.vartoint universe pkg)) essential
    end else [] in
    Cudf.iter_packages (fun pkg ->
        let pkgv = PkgV.Pkg (CudfAdd.vartoint universe pkg) in
        G.add_vertex gr pkgv;
        List.iter (fun vpkgs ->
            (* this is a reverse dependency graph so if A depends on B then an
             * edge is added from B to A *)
            List.iter
              (fun p -> add_edge pkgv (PkgV.Pkg p))
              (List.flatten (List.map (CudfAdd.resolve_vpkg_int universe) vpkgs))
          ) pkg.Cudf.depends;
        (* add edges from all essential packages (if any) to all vertices *)
        List.iter (add_edge pkgv) essential
      ) universe;
    gr

  let condense g =
    let module C = Graph.Components.Make(G) in
    (* calculate SCCs and map each found SCC to either its original vertex in
     * case of a degenerate component of size one or to a vertex containing the
     * set of packages in the SCC *)
    let sccs = Array.map (function
        | [] -> fatal "scc cannot be empty"
        | [v] -> v
        | scc ->
          let pset = List.fold_right (function
              | PkgV.SCC _ -> fatal "cannot condense graph with SSC vertices"
              | PkgV.Pkg p -> IntSet.add p) scc IntSet.empty
          in PkgV.SCC pset)
        (C.scc_array g)
    in
    (* associate with each SCC a unique ID and create a mapping from all
     * vertices in the original graph to its SCC ID *)
    let mapping = Hashtbl.create (G.nb_vertex g) in
    Array.iteri (fun i v ->
        match v with
        | PkgV.Pkg _ -> Hashtbl.add mapping v i
        | PkgV.SCC s -> IntSet.iter (fun p -> Hashtbl.add mapping (PkgV.Pkg p) i) s
      ) sccs;
    (* go through the edges in the original graph, find out the SCC IDs that
     * their vertices belong to and if they are different, add that edge to the
     * new graph connecting the SCCs *)
    let cg = G.create () in
    G.iter_edges (fun v1 v2 ->
      let scc1 = Hashtbl.find mapping v1 in
      let scc2 = Hashtbl.find mapping v2 in
      if scc1 <> scc2 then
        G.add_edge cg sccs.(scc1) sccs.(scc2)
    ) g;
    cg

  let get_dependency_closures universe g =
    let module Dfs = Graph.Traverse.Dfs(G) in
    if Dfs.has_cycle g then fatal "cannot get dependency closures of a cyclic graph";
    (* traverse the vertices in the graph in post-order depth first search and
     * for each visited vertex V add to V the union of:
     *       1. all packages in V (one package in Pkg vertices and multiple in
     *          SCC vertices)
     *       2. all packages associated with the successors of V
     *)
    let intclosureht = Hashtbl.create (G.nb_vertex g) in
    (* a package graph of Debian Sid 2014 has 2500 SCCs *)
    let sccht = Hashtbl.create 2500 in
    let intsettovarset s = IntSet.fold
        (fun p -> CudfAdd.Cudf_set.add (CudfAdd.inttovar universe p))
        s CudfAdd.Cudf_set.empty
    in
    Dfs.postfix (fun v ->
        let succ_set = G.fold_succ (function
            | PkgV.Pkg pp -> CudfAdd.Cudf_set.union (Hashtbl.find intclosureht pp)
            | PkgV.SCC ps -> CudfAdd.Cudf_set.union (Hashtbl.find sccht ps)
          ) g v CudfAdd.Cudf_set.empty
        in
        match v with
        | PkgV.Pkg p ->
          if Hashtbl.mem intclosureht p then fatal "package already in hashtbl";
          let closure = CudfAdd.Cudf_set.add (CudfAdd.inttovar universe p) succ_set in
          Hashtbl.add intclosureht p closure
        | PkgV.SCC s ->
          if Hashtbl.mem sccht s then fatal "package already in hashtbl";
          let closure = CudfAdd.Cudf_set.union (intsettovarset s) succ_set in
          Hashtbl.add sccht s closure
      ) g;
    let closureht = Hashtbl.create (G.nb_vertex g) in
    (* expand SCCs into individual packages *)
    Hashtbl.iter
      (fun scc s -> IntSet.iter
          (fun p -> Hashtbl.add closureht (CudfAdd.inttovar universe p) s) scc)
      sccht;
    (* convert integer based hashtable into var based one *)
    Hashtbl.iter
      (fun k v -> Hashtbl.add closureht (CudfAdd.inttovar universe k) v)
      intclosureht;
    closureht
end

let get_closure_method_1 ?(global_constraints=true) univ bl sl =
  let module Set = CudfAdd.Cudf_set in
  let affectedht = Hashtbl.create (List.length bl) in
  let pool = Depsolver_int.init_pool_univ ~global_constraints univ in
  let globalid = Cudf.universe_size univ in
  let add_closure_to_ht pkg =
    let id = CudfAdd.vartoint univ pkg in
    let dc = Depsolver_int.dependency_closure_cache pool [id; globalid] in
    (* filter out globalid and convert to var *)
    let dc = List.fold_left (fun acc pid ->
        if pid = globalid then acc
        else Set.add (CudfAdd.inttovar univ pid) acc
      ) Set.empty dc
    in
    Hashtbl.add affectedht pkg dc
  in
  List.iter (add_closure_to_ht) (bl@sl);
  affectedht

let get_closure_method_2 ?(global_constraints=true) univ =
  let g = PackageGraphCondensed.dependency_graph ~reverse:false ~global_constraints univ in
  let module Dfs = Graph.Traverse.Dfs(PackageGraphCondensed.G) in
  (* if graph has cycle, condense strongly connected components into single vertices *)
  let g = if Dfs.has_cycle g then PackageGraphCondensed.condense g else g in
  let ht = PackageGraphCondensed.get_dependency_closures univ g in
  ht