#/bin/env python from hashlib import md5 import os # remove duplicates filedict = dict() for root, dirs, files in os.walk('.'): for f in files: path = os.path.join(root, f) fo = open(path) content = fo.read() fo.close() h = md5(content).hexdigest() if filedict.get(h, None): os.remove(path) else: filedict[h] = path print filedict