mmdebstrap/tarfilter

#!/usr/bin/env python3
#
# This script is in the public domain
#
# Author: Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
#
# This script accepts a tarball on standard input and filters it according to
# the same rules used by dpkg --path-exclude and --path-include, using command
# line options of the same name. The result is then printed on standard output.
#
# A tool like this should be written in C but libarchive has issues:
# https://github.com/libarchive/libarchive/issues/587
# https://github.com/libarchive/libarchive/pull/1288/ (needs 3.4.1)
# Should these issues get fixed, then a good template is tarfilter.c in the
# examples directory of libarchive.
#
# We are not using Perl either, because Archive::Tar slurps the whole tarball
# into memory.
#
# We could also use Go but meh...
# https://stackoverflow.com/a/59542307/784669

import tarfile
import sys
import argparse
import fnmatch
import re


class PathFilterAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        items = getattr(namespace, "pathfilter", [])
        regex = re.compile(fnmatch.translate(values))
        items.append((self.dest, regex))
        setattr(namespace, "pathfilter", items)


class PaxFilterAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        items = getattr(namespace, "paxfilter", [])
        regex = re.compile(fnmatch.translate(values))
        items.append((self.dest, regex))
        setattr(namespace, "paxfilter", items)


class TransformAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        items = getattr(namespace, "trans", [])
        # This function mimics what src/transform.c from tar does
        if not values.startswith("s"):
            raise ValueError("regex must start with an 's'")
        if len(values) <= 4:
            # minimum regex: s/x//
            raise ValueError("invalid regex (too short)")
        d = values[1]
        if values.startswith(f"s{d}{d}"):
            raise ValueError("empty regex")
        values = values.removeprefix(f"s{d}")
        flags = 0
        if values.endswith(f"{d}i"):
            # trailing flags
            flags = re.IGNORECASE
            values = values.removesuffix(f"{d}i")
        # This regex only finds non-empty tokens.
        # Finding empty tokens would require a variable length look-behind
        # or \K in order to find escaped delimiters which is not supported by
        # the python re module.
        tokens = re.findall(rf"(?:\\[\\{d}]|[^{d}])+", values)
        match len(tokens):
            case 0:
                raise ValueError("invalid regex: not enough terms")
            case 1:
                repl = ""
            case 2:
                repl = tokens[1]
            case _:
                raise ValueError("invalid regex: too many terms: %s" % tokens)
        items.append((re.compile(tokens[0], flags), repl))
        setattr(namespace, "trans", items)


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
Filters a tarball on standard input by the same rules as the dpkg --path-exclude
and --path-include options and writes resulting tarball to standard output. See
dpkg(1) for information on how these two options work in detail. To reuse the
exact same semantics as used by dpkg, paths must be given as /path and not as
./path even though they might be stored as such in the tarball.

Secondly, filter out unwanted pax extended headers. This is useful in cases
where a tool only accepts certain xattr prefixes. For example tar2sqfs only
supports SCHILY.xattr.user.*, SCHILY.xattr.trusted.* and
SCHILY.xattr.security.* but not SCHILY.xattr.system.posix_acl_default.*.

Both types of options use Unix shell-style wildcards:

       * matches everything
       ? matches any single character
   [seq] matches any character in seq
  [!seq] matches any character not in seq

Thirdly, transform the path of tar members using a sed expression just as with
GNU tar --transform.

Fourthly, strip leading directory components off of tar members. Just as with
GNU tar --strip-components, tar members that have less or equal components in
their path are not passed through.

Lastly, shift user id and group id of each entry by the value given by the
--idshift argument. The resulting uid or gid must not be negative.
""",
    )
    parser.add_argument(
        "--path-exclude",
        metavar="pattern",
        action=PathFilterAction,
        help="Exclude path matching the given shell pattern. "
        "This option can be specified multiple times.",
    )
    parser.add_argument(
        "--path-include",
        metavar="pattern",
        action=PathFilterAction,
        help="Re-include a pattern after a previous exclusion. "
        "This option can be specified multiple times.",
    )
    parser.add_argument(
        "--pax-exclude",
        metavar="pattern",
        action=PaxFilterAction,
        help="Exclude pax header matching the given globbing pattern. "
        "This option can be specified multiple times.",
    )
    parser.add_argument(
        "--pax-include",
        metavar="pattern",
        action=PaxFilterAction,
        help="Re-include a pax header after a previous exclusion. "
        "This option can be specified multiple times.",
    )
    parser.add_argument(
        "--transform",
        "--xform",
        metavar="EXPRESSION",
        action=TransformAction,
        help="Use sed replace EXPRESSION to transform file names. "
        "This option can be specified multiple times.",
    )
    parser.add_argument(
        "--strip-components",
        metavar="NUMBER",
        type=int,
        help="Strip NUMBER leading components from file names",
    )
    parser.add_argument(
        "--idshift",
        metavar="NUM",
        type=int,
        help="Integer value by which to shift the uid and gid of each entry",
    )
    args = parser.parse_args()
    if (
        not hasattr(args, "pathfilter")
        and not hasattr(args, "paxfilter")
        and not hasattr(args, "strip_components")
    ):
        from shutil import copyfileobj

        copyfileobj(sys.stdin.buffer, sys.stdout.buffer)
        exit()

    # same logic as in dpkg/src/filters.c/filter_should_skip()
    prefix_prog = re.compile(r"^([^*?[\\]*).*")

    def path_filter_should_skip(member):
        skip = False
        if not hasattr(args, "pathfilter"):
            return False
        for (t, r) in args.pathfilter:
            if r.match(member.name[1:]) is not None:
                if t == "path_include":
                    skip = False
                else:
                    skip = True
        if skip and (member.isdir() or member.issym()):
            for (t, r) in args.pathfilter:
                if t != "path_include":
                    continue
                prefix = prefix_prog.sub(r"\1", r.pattern)
                prefix = prefix.rstrip("/")
                if member.name[1:].startswith(prefix):
                    return False
        return skip

    def pax_filter_should_skip(header):
        if not hasattr(args, "paxfilter"):
            return False
        skip = False
        for (t, r) in args.paxfilter:
            if r.match(header) is None:
                continue
            if t == "pax_include":
                skip = False
            else:
                skip = True
        return skip

    # starting with Python 3.8, the default format became PAX_FORMAT, so this
    # is only for compatibility with older versions of Python 3
    with tarfile.open(fileobj=sys.stdin.buffer, mode="r|*") as in_tar, tarfile.open(
        fileobj=sys.stdout.buffer, mode="w|", format=tarfile.PAX_FORMAT
    ) as out_tar:
        for member in in_tar:
            if path_filter_should_skip(member):
                continue
            if args.strip_components:
                comps = member.name.split("/")
                # just as with GNU tar, archive members with less or equal
                # number of components are not passed through at all
                if len(comps) <= args.strip_components:
                    continue
                member.name = "/".join(comps[args.strip_components :])
            member.pax_headers = {
                k: v
                for k, v in member.pax_headers.items()
                if not pax_filter_should_skip(k)
            }
            if args.idshift:
                if args.idshift < 0 and -args.idshift > member.uid:
                    print("uid cannot be negative", file=sys.stderr)
                    exit(1)
                if args.idshift < 0 and -args.idshift > member.gid:
                    print("gid cannot be negative", file=sys.stderr)
                    exit(1)
                member.uid += args.idshift
                member.gid += args.idshift
            if hasattr(args, "trans"):
                for r, s in args.trans:
                    member.name = r.sub(s, member.name)
            if member.isfile():
                with in_tar.extractfile(member) as file:
                    out_tar.addfile(member, file)
            else:
                out_tar.addfile(member)


if __name__ == "__main__":
    main()
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`#!/usr/bin/env python3`
			`#`
			`# This script is in the public domain`
			`#`
add my name to several scripts 2021-09-16 14:24:16 +00:00			`# Author: Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>`
			`#`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`# This script accepts a tarball on standard input and filters it according to`
			`# the same rules used by dpkg --path-exclude and --path-include, using command`
			`# line options of the same name. The result is then printed on standard output.`
			`#`
			`# A tool like this should be written in C but libarchive has issues:`
			`# https://github.com/libarchive/libarchive/issues/587`
			`# https://github.com/libarchive/libarchive/pull/1288/ (needs 3.4.1)`
			`# Should these issues get fixed, then a good template is tarfilter.c in the`
			`# examples directory of libarchive.`
			`#`
			`# We are not using Perl either, because Archive::Tar slurps the whole tarball`
			`# into memory.`
			`#`
			`# We could also use Go but meh...`
			`# https://stackoverflow.com/a/59542307/784669`

			`import tarfile`
			`import sys`
			`import argparse`
Optimize mmtarfilter to handle many path exclusions mmtarfilter uses fnmatch to handle path exclusions and inclusions. Python's fnmatch handles shell patterns by translating them to regular expressions, with a 256-entry LRU cache. With more than 256 path exclusions or inclusions, this LRU cache no longer works, and every invocation of fnmatch on every file in every package will re-translate and re-compile a regular expression, resulting in much worse performance. Translate all the shell patterns to regular expressions once. For an mmdebstrap invocation with around 500 path filters, this speeds up mmdebstrap by more than a minute. 2020-12-31 20:49:16 +00:00			`import fnmatch`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`import re`


tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`class PathFilterAction(argparse.Action):`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`def __call__(self, parser, namespace, values, option_string=None):`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`items = getattr(namespace, "pathfilter", [])`
Optimize mmtarfilter to handle many path exclusions mmtarfilter uses fnmatch to handle path exclusions and inclusions. Python's fnmatch handles shell patterns by translating them to regular expressions, with a 256-entry LRU cache. With more than 256 path exclusions or inclusions, this LRU cache no longer works, and every invocation of fnmatch on every file in every package will re-translate and re-compile a regular expression, resulting in much worse performance. Translate all the shell patterns to regular expressions once. For an mmdebstrap invocation with around 500 path filters, this speeds up mmdebstrap by more than a minute. 2020-12-31 20:49:16 +00:00			`regex = re.compile(fnmatch.translate(values))`
			`items.append((self.dest, regex))`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`setattr(namespace, "pathfilter", items)`


			`class PaxFilterAction(argparse.Action):`
			`def __call__(self, parser, namespace, values, option_string=None):`
			`items = getattr(namespace, "paxfilter", [])`
			`regex = re.compile(fnmatch.translate(values))`
			`items.append((self.dest, regex))`
			`setattr(namespace, "paxfilter", items)`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00

tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`class TransformAction(argparse.Action):`
			`def __call__(self, parser, namespace, values, option_string=None):`
			`items = getattr(namespace, "trans", [])`
			`# This function mimics what src/transform.c from tar does`
			`if not values.startswith("s"):`
			`raise ValueError("regex must start with an 's'")`
			`if len(values) <= 4:`
			`# minimum regex: s/x//`
			`raise ValueError("invalid regex (too short)")`
			`d = values[1]`
			`if values.startswith(f"s{d}{d}"):`
			`raise ValueError("empty regex")`
			`values = values.removeprefix(f"s{d}")`
			`flags = 0`
			`if values.endswith(f"{d}i"):`
			`# trailing flags`
			`flags = re.IGNORECASE`
			`values = values.removesuffix(f"{d}i")`
			`# This regex only finds non-empty tokens.`
			`# Finding empty tokens would require a variable length look-behind`
			`# or \K in order to find escaped delimiters which is not supported by`
			`# the python re module.`
			`tokens = re.findall(rf"(?:\\[\\{d}]\|[^{d}])+", values)`
			`match len(tokens):`
			`case 0:`
			`raise ValueError("invalid regex: not enough terms")`
			`case 1:`
			`repl = ""`
			`case 2:`
			`repl = tokens[1]`
			`case _:`
			`raise ValueError("invalid regex: too many terms: %s" % tokens)`
			`items.append((re.compile(tokens[0], flags), repl))`
			`setattr(namespace, "trans", items)`


no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`def main():`
tarfilter/taridshift: use argparse 2020-11-26 23:32:14 +00:00			`parser = argparse.ArgumentParser(`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`formatter_class=argparse.RawDescriptionHelpFormatter,`
tarfilter/taridshift: use argparse 2020-11-26 23:32:14 +00:00			`description="""\`
			`Filters a tarball on standard input by the same rules as the dpkg --path-exclude`
			`and --path-include options and writes resulting tarball to standard output. See`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`dpkg(1) for information on how these two options work in detail. To reuse the`
			`exact same semantics as used by dpkg, paths must be given as /path and not as`
			`./path even though they might be stored as such in the tarball.`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`Secondly, filter out unwanted pax extended headers. This is useful in cases`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`where a tool only accepts certain xattr prefixes. For example tar2sqfs only`
			`supports SCHILY.xattr.user., SCHILY.xattr.trusted. and`
			`SCHILY.xattr.security.* but not SCHILY.xattr.system.posix_acl_default.*.`

			`Both types of options use Unix shell-style wildcards:`

			`* matches everything`
			`? matches any single character`
			`[seq] matches any character in seq`
			`[!seq] matches any character not in seq`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`Thirdly, transform the path of tar members using a sed expression just as with`
			`GNU tar --transform.`

			`Fourthly, strip leading directory components off of tar members. Just as with`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00			`GNU tar --strip-components, tar members that have less or equal components in`
			`their path are not passed through.`
tarfilter --idshift now provides taridshift 2022-08-31 03:35:40 +00:00
			`Lastly, shift user id and group id of each entry by the value given by the`
			`--idshift argument. The resulting uid or gid must not be negative.`
			`""",`
tarfilter/taridshift: use argparse 2020-11-26 23:32:14 +00:00			`)`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`parser.add_argument(`
			`"--path-exclude",`
			`metavar="pattern",`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`action=PathFilterAction,`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`help="Exclude path matching the given shell pattern. "`
			`"This option can be specified multiple times.",`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`)`
			`parser.add_argument(`
			`"--path-include",`
			`metavar="pattern",`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`action=PathFilterAction,`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`help="Re-include a pattern after a previous exclusion. "`
			`"This option can be specified multiple times.",`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`)`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`parser.add_argument(`
			`"--pax-exclude",`
			`metavar="pattern",`
			`action=PaxFilterAction,`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`help="Exclude pax header matching the given globbing pattern. "`
			`"This option can be specified multiple times.",`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`)`
			`parser.add_argument(`
			`"--pax-include",`
			`metavar="pattern",`
			`action=PaxFilterAction,`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`help="Re-include a pax header after a previous exclusion. "`
			`"This option can be specified multiple times.",`
			`)`
			`parser.add_argument(`
			`"--transform",`
			`"--xform",`
			`metavar="EXPRESSION",`
			`action=TransformAction,`
			`help="Use sed replace EXPRESSION to transform file names. "`
			`"This option can be specified multiple times.",`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`)`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00			`parser.add_argument(`
			`"--strip-components",`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`metavar="NUMBER",`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00			`type=int,`
			`help="Strip NUMBER leading components from file names",`
			`)`
tarfilter --idshift now provides taridshift 2022-08-31 03:35:40 +00:00			`parser.add_argument(`
			`"--idshift",`
			`metavar="NUM",`
			`type=int,`
			`help="Integer value by which to shift the uid and gid of each entry",`
			`)`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`args = parser.parse_args()`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00			`if (`
			`not hasattr(args, "pathfilter")`
			`and not hasattr(args, "paxfilter")`
			`and not hasattr(args, "strip_components")`
			`):`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`from shutil import copyfileobj`

			`copyfileobj(sys.stdin.buffer, sys.stdout.buffer)`
			`exit()`

			`# same logic as in dpkg/src/filters.c/filter_should_skip()`
tarfilter: Compile prefix pattern only once According to Debian bug #978742, mmtarfilter has a slow performance with many path exclusions. The execution can be speed up if the regular expression is only compiled once instead of every time in the hot loop. Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com> 2021-02-18 20:14:11 +00:00			`prefix_prog = re.compile(r"^([^?[\\]).*")`
tarfilter: fixup last commit by formatting with black 2021-02-22 12:45:55 +00:00
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`def path_filter_should_skip(member):`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`skip = False`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`if not hasattr(args, "pathfilter"):`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`return False`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`for (t, r) in args.pathfilter:`
Optimize mmtarfilter to handle many path exclusions mmtarfilter uses fnmatch to handle path exclusions and inclusions. Python's fnmatch handles shell patterns by translating them to regular expressions, with a 256-entry LRU cache. With more than 256 path exclusions or inclusions, this LRU cache no longer works, and every invocation of fnmatch on every file in every package will re-translate and re-compile a regular expression, resulting in much worse performance. Translate all the shell patterns to regular expressions once. For an mmdebstrap invocation with around 500 path filters, this speeds up mmdebstrap by more than a minute. 2020-12-31 20:49:16 +00:00			`if r.match(member.name[1:]) is not None:`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`if t == "path_include":`
			`skip = False`
			`else:`
			`skip = True`
			`if skip and (member.isdir() or member.issym()):`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`for (t, r) in args.pathfilter:`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`if t != "path_include":`
			`continue`
tarfilter: Compile prefix pattern only once According to Debian bug #978742, mmtarfilter has a slow performance with many path exclusions. The execution can be speed up if the regular expression is only compiled once instead of every time in the hot loop. Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com> 2021-02-18 20:14:11 +00:00			`prefix = prefix_prog.sub(r"\1", r.pattern)`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`prefix = prefix.rstrip("/")`
			`if member.name[1:].startswith(prefix):`
			`return False`
			`return skip`

tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`def pax_filter_should_skip(header):`
			`if not hasattr(args, "paxfilter"):`
			`return False`
			`skip = False`
			`for (t, r) in args.paxfilter:`
			`if r.match(header) is None:`
			`continue`
			`if t == "pax_include":`
			`skip = False`
			`else:`
			`skip = True`
			`return skip`

no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`# starting with Python 3.8, the default format became PAX_FORMAT, so this`
			`# is only for compatibility with older versions of Python 3`
			`with tarfile.open(fileobj=sys.stdin.buffer, mode="r\|*") as in_tar, tarfile.open(`
			`fileobj=sys.stdout.buffer, mode="w\|", format=tarfile.PAX_FORMAT`
			`) as out_tar:`
			`for member in in_tar:`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`if path_filter_should_skip(member):`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`continue`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00			`if args.strip_components:`
			`comps = member.name.split("/")`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`# just as with GNU tar, archive members with less or equal`
			`# number of components are not passed through at all`
tarfilter: add --strip-components option 2022-02-11 21:56:38 +00:00			`if len(comps) <= args.strip_components:`
			`continue`
			`member.name = "/".join(comps[args.strip_components :])`
tarfilter: add --pax-exclude and --pax-include to strip extended attributes because tar2sqfs only supports user., trusted. and security.* 2021-05-07 07:39:40 +00:00			`member.pax_headers = {`
			`k: v`
			`for k, v in member.pax_headers.items()`
			`if not pax_filter_should_skip(k)`
			`}`
tarfilter --idshift now provides taridshift 2022-08-31 03:35:40 +00:00			`if args.idshift:`
			`if args.idshift < 0 and -args.idshift > member.uid:`
			`print("uid cannot be negative", file=sys.stderr)`
			`exit(1)`
			`if args.idshift < 0 and -args.idshift > member.gid:`
			`print("gid cannot be negative", file=sys.stderr)`
			`exit(1)`
			`member.uid += args.idshift`
			`member.gid += args.idshift`
tarfilter: add --transform option 2022-08-31 03:52:28 +00:00			`if hasattr(args, "trans"):`
			`for r, s in args.trans:`
			`member.name = r.sub(s, member.name)`
no longer needs to install twice when --depkgopt=path-exclude is given by filtering the tarball with new tarfilter utility 2020-08-25 11:02:33 +00:00			`if member.isfile():`
			`with in_tar.extractfile(member) as file:`
			`out_tar.addfile(member, file)`
			`else:`
			`out_tar.addfile(member)`


			`if __name__ == "__main__":`
			`main()`