initial commit
This commit is contained in:
commit
25c4835b87
3 changed files with 70 additions and 0 deletions
11
README
Normal file
11
README
Normal file
|
@ -0,0 +1,11 @@
|
|||
When using `wget -rk` to locally mirror a website, wget misses to do some
|
||||
things which these two scripts fix:
|
||||
|
||||
- if a url contains a GET query string like
|
||||
http://mysite.com/foo?bar=baz&blub=bla then a file with name
|
||||
`foo?bar=baz&blub=bla` gets saved. The links from other documents to this
|
||||
file do not get urlencoded. This is fixed by `urlencode.py`.
|
||||
- if the copy is put online then the webserver will most likely determine the
|
||||
content type of the static content by the filename extension. But files that
|
||||
are saved under a name like `foo?bar=baz&blub=bla` do not have a recognized
|
||||
extension. This is fixed by `filenameext.sh`.
|
36
filenameext.sh
Executable file
36
filenameext.sh
Executable file
|
@ -0,0 +1,36 @@
|
|||
#!/bin/sh
|
||||
|
||||
for f in *; do
|
||||
echo $f
|
||||
# skip directories
|
||||
[ -d $f ] && continue
|
||||
case `file --mime-type $f | awk '{print $2}'` in
|
||||
application/gzip) ext="gz";;
|
||||
application/pdf) ext="pdf";;
|
||||
application/x-bzip2) ext="bz2";;
|
||||
application/x-debian-package) ext="deb";;
|
||||
application/x-dosexec) ext="exe";;
|
||||
application/x-rar) ext="rar";;
|
||||
application/zip) ext="zip";;
|
||||
audio/mpeg) ext="mp3";;
|
||||
audio/x-wav) ext="wav";;
|
||||
binary) ext="wav";;
|
||||
image/gif) ext="gif";;
|
||||
image/jpeg) ext="jpg";;
|
||||
image/png) ext="png";;
|
||||
image/x-ms-bmp) ext="bmp";;
|
||||
text/html) ext="html";;
|
||||
text/plain) ext="txt";;
|
||||
text/x-tex) ext="tex";;
|
||||
video/mpeg) ext="mpg";;
|
||||
video/x-msvideo) ext="avi";;
|
||||
esac
|
||||
# do not handle this file if the extension already matches
|
||||
case $f in
|
||||
*$ext) continue
|
||||
esac
|
||||
mv ${f} ${f}.${ext}
|
||||
# now replace & and ? in f
|
||||
f=`echo "$f" | sed 's/?/%3F/g; s/&/%26/g'`
|
||||
perl -pi -e "s/\\Q${f}\\E([#\"])/${f}.${ext}\1/g" *php*
|
||||
done
|
23
urlencode.py
Executable file
23
urlencode.py
Executable file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
for filename in sys.argv[1:]:
|
||||
try:
|
||||
with open(filename) as f:
|
||||
data = f.read()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
def aux(match):
|
||||
s = match.group(0)
|
||||
s = s.replace("?", "%3F")
|
||||
s = s.replace("&", "%26")
|
||||
return s
|
||||
|
||||
data = re.sub(r'href="[^"]+\.php[^"]*"', aux, data)
|
||||
data = re.sub(r'src="[^"]+\.php[^"]*"', aux, data)
|
||||
|
||||
with open(filename, "w") as f:
|
||||
f.write(data)
|
Loading…
Reference in a new issue