initial commit
This commit is contained in:
commit
25c4835b87
3 changed files with 70 additions and 0 deletions
11
README
Normal file
11
README
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
When using `wget -rk` to locally mirror a website, wget misses to do some
|
||||||
|
things which these two scripts fix:
|
||||||
|
|
||||||
|
- if a url contains a GET query string like
|
||||||
|
http://mysite.com/foo?bar=baz&blub=bla then a file with name
|
||||||
|
`foo?bar=baz&blub=bla` gets saved. The links from other documents to this
|
||||||
|
file do not get urlencoded. This is fixed by `urlencode.py`.
|
||||||
|
- if the copy is put online then the webserver will most likely determine the
|
||||||
|
content type of the static content by the filename extension. But files that
|
||||||
|
are saved under a name like `foo?bar=baz&blub=bla` do not have a recognized
|
||||||
|
extension. This is fixed by `filenameext.sh`.
|
36
filenameext.sh
Executable file
36
filenameext.sh
Executable file
|
@ -0,0 +1,36 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
for f in *; do
|
||||||
|
echo $f
|
||||||
|
# skip directories
|
||||||
|
[ -d $f ] && continue
|
||||||
|
case `file --mime-type $f | awk '{print $2}'` in
|
||||||
|
application/gzip) ext="gz";;
|
||||||
|
application/pdf) ext="pdf";;
|
||||||
|
application/x-bzip2) ext="bz2";;
|
||||||
|
application/x-debian-package) ext="deb";;
|
||||||
|
application/x-dosexec) ext="exe";;
|
||||||
|
application/x-rar) ext="rar";;
|
||||||
|
application/zip) ext="zip";;
|
||||||
|
audio/mpeg) ext="mp3";;
|
||||||
|
audio/x-wav) ext="wav";;
|
||||||
|
binary) ext="wav";;
|
||||||
|
image/gif) ext="gif";;
|
||||||
|
image/jpeg) ext="jpg";;
|
||||||
|
image/png) ext="png";;
|
||||||
|
image/x-ms-bmp) ext="bmp";;
|
||||||
|
text/html) ext="html";;
|
||||||
|
text/plain) ext="txt";;
|
||||||
|
text/x-tex) ext="tex";;
|
||||||
|
video/mpeg) ext="mpg";;
|
||||||
|
video/x-msvideo) ext="avi";;
|
||||||
|
esac
|
||||||
|
# do not handle this file if the extension already matches
|
||||||
|
case $f in
|
||||||
|
*$ext) continue
|
||||||
|
esac
|
||||||
|
mv ${f} ${f}.${ext}
|
||||||
|
# now replace & and ? in f
|
||||||
|
f=`echo "$f" | sed 's/?/%3F/g; s/&/%26/g'`
|
||||||
|
perl -pi -e "s/\\Q${f}\\E([#\"])/${f}.${ext}\1/g" *php*
|
||||||
|
done
|
23
urlencode.py
Executable file
23
urlencode.py
Executable file
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
for filename in sys.argv[1:]:
|
||||||
|
try:
|
||||||
|
with open(filename) as f:
|
||||||
|
data = f.read()
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def aux(match):
|
||||||
|
s = match.group(0)
|
||||||
|
s = s.replace("?", "%3F")
|
||||||
|
s = s.replace("&", "%26")
|
||||||
|
return s
|
||||||
|
|
||||||
|
data = re.sub(r'href="[^"]+\.php[^"]*"', aux, data)
|
||||||
|
data = re.sub(r'src="[^"]+\.php[^"]*"', aux, data)
|
||||||
|
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
f.write(data)
|
Loading…
Reference in a new issue