mmdebstrap/caching_proxy.py
Johannes Schauer Marin Rodrigues 5885291213
Use an caching apt proxy instead of copying /var/cache/apt/archives/*.deb
- only download Release files once and not by apt as well as with curl
   and thus avoid a mirror push happening between both downloads
 - no heuristic needed to place the file in their correct mirror
   location
 - no manual checksum checking
 - only throttle download speed when actually downloading and not when
   retrieving files from the cache
 - no translation of filenames between how the epoch colon is stored in
   files in /var/cache/apt/archives versus how it is stored in files on
   the mirrors
 - no special handling of stable update and security mirrors
 - implemented in Python instead of shell and thus an order of magnitude
   faster
2023-03-03 08:34:39 +01:00

108 lines
3.7 KiB
Python
Executable file

#!/usr/bin/env python3
import sys
import os
import time
import http.client
import http.server
from io import StringIO
import pathlib
import urllib.parse
oldcachedir = None
newcachedir = None
readonly = False
class ProxyRequestHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
assert int(self.headers.get("Content-Length", 0)) == 0
assert self.headers["Host"]
pathprefix = "http://" + self.headers["Host"] + "/"
assert self.path.startswith(pathprefix)
sanitizedpath = urllib.parse.unquote(self.path.removeprefix(pathprefix))
oldpath = oldcachedir / sanitizedpath
newpath = newcachedir / sanitizedpath
if not readonly:
newpath.parent.mkdir(parents=True, exist_ok=True)
# just send back to client
if newpath.exists():
print(f"proxy cached: {self.path}", file=sys.stderr)
self.wfile.write(b"HTTP/1.1 200 OK\r\n")
self.send_header("Content-Length", newpath.stat().st_size)
self.end_headers()
with newpath.open(mode="rb") as new:
while True:
buf = new.read(64 * 1024) # same as shutil uses
if not buf:
break
self.wfile.write(buf)
self.wfile.flush()
return
if readonly:
newpath = pathlib.Path("/dev/null")
# copy from oldpath to newpath and send back to client
if oldpath.exists():
print(f"proxy cached: {self.path}", file=sys.stderr)
self.wfile.write(b"HTTP/1.1 200 OK\r\n")
self.send_header("Content-Length", oldpath.stat().st_size)
self.end_headers()
with oldpath.open(mode="rb") as old, newpath.open(mode="wb") as new:
while True:
buf = old.read(64 * 1024) # same as shutil uses
if not buf:
break
self.wfile.write(buf)
new.write(buf)
self.wfile.flush()
return
# download fresh copy
try:
print(f"\rproxy download: {self.path}", file=sys.stderr)
conn = http.client.HTTPConnection(self.headers["Host"], timeout=5)
conn.request("GET", self.path, None, dict(self.headers))
res = conn.getresponse()
assert (res.status, res.reason) == (200, "OK"), (res.status, res.reason)
self.wfile.write(b"HTTP/1.1 200 OK\r\n")
for k, v in res.getheaders():
# do not allow a persistent connection
if k == "connection":
continue
self.send_header(k, v)
self.end_headers()
with newpath.open(mode="wb") as f:
while True:
buf = res.read(64 * 1024) # same as shutil uses
if not buf:
break
self.wfile.write(buf)
f.write(buf)
time.sleep(64 / 1024) # 1024 kB/s
self.wfile.flush()
except Exception as e:
self.send_error(502)
def main():
global oldcachedir, newcachedir, readonly
if sys.argv[1] == "--readonly":
readonly = True
oldcachedir = pathlib.Path(sys.argv[2])
newcachedir = pathlib.Path(sys.argv[3])
else:
oldcachedir = pathlib.Path(sys.argv[1])
newcachedir = pathlib.Path(sys.argv[2])
print(f"starting caching proxy for {newcachedir}", file=sys.stderr)
httpd = http.server.ThreadingHTTPServer(
server_address=("", 8080), RequestHandlerClass=ProxyRequestHandler
)
httpd.serve_forever()
if __name__ == "__main__":
main()