#! /usr/bin/env python # -*- mode: python; make-backup-files: nil -*- # Changelog: # Version 0.1: initial release # Version 0.2: changed mmap call to use access= parameter # got rid of tab indents import os import md5 from stat import S_ISDIR, S_ISREG, ST_MODE, ST_SIZE from mmap import mmap, ACCESS_READ from netrc import netrc from ftplib import FTP,error_perm def log(s): print s def sort(l): nl = list(l) nl.sort() return nl def reversesort(l): nl = list(l) nl.sort() nl.reverse() return nl def archive_md5s(root_directory): def rootopen(pathname, mode): return os.open(os.path.join(root_directory,pathname),mode) def rootstat(pathname): return os.stat(os.path.join(root_directory,pathname)) def rootlistdir(directory): return os.listdir(os.path.join(root_directory,directory)) def file_md5(pathname, size): fd = rootopen(pathname,os.O_RDONLY) try: m = mmap(fd,size,access=ACCESS_READ) d = md5.new() d.update(m) m.close() del m return d.hexdigest() finally: os.close(fd) def sub_archive_md5s(reldir): pathnames = [ os.path.join(reldir,filename) for filename in rootlistdir(reldir) ] stats = [ (pathname,rootstat(pathname)) for pathname in pathnames ] subdirs = [ s for s in stats if S_ISDIR(s[1][ST_MODE]) ] files = [ s for s in stats if S_ISREG(s[1][ST_MODE]) ] checksums = {} dirstructure = { reldir: 'dir' } for pathname,stat in files: checksums[pathname] = file_md5(pathname, stat[ST_SIZE]) for subdir,stat in subdirs: if subdir[:1] == '.' or subdir[-7:] == '.unused': continue c, d = sub_archive_md5s(subdir) checksums.update(c) dirstructure.update(d) return checksums, dirstructure return sub_archive_md5s('') def write_checksums(filename, checksums, dirstructure): f = open(filename, "w") try: for a in sort(dirstructure.keys()): f.write("dir %s\n" % a) for a,b in sort(checksums.items()): f.write("%s %s\n" % (b,a)) finally: f.close() def read_checksums(filename): checksums = {} dirstructure = {} f = open(filename, "r") try: for line in f: if line[-1:] == '\n': line = line[:-1] c,p = line.split(' ',1) if c == 'dir': dirstructure[p] = "dir" else: checksums[p] = c return checksums, dirstructure finally: f.close() def login(server,server_dir): n = netrc() auth = n.authenticators(server) if auth == None: login, account, password = '','','' else: login, account, password = auth ftp = FTP(server, login, password, account) ftp.cwd(server_dir) return ftp def sync(archive_dir, server, server_dir, sync_dir = ''): sync_file = os.path.join(sync_dir, "sync,%s,%s" % (server,server_dir)) check_file = os.path.join(sync_dir, "chek,%s,%s" % (server,server_dir)) log("Reading sync file '%s'" % sync_file) ftp_checksums, ftp_dirstructure = read_checksums(sync_file) log("Determining archive md5s") arc_checksums, arc_dirstructure = archive_md5s(archive_dir) if (arc_checksums == ftp_checksums and arc_dirstructure == ftp_dirstructure): log("No changes to be made") return log("Logging into server '%s'" % server) ftp = login(server,server_dir) try: log("Making new directories") for arcdir in sort(arc_dirstructure.keys()): if not ftp_dirstructure.has_key(arcdir) and arcdir: log(" %s" % arcdir) ftp.mkd(arcdir) ftp_dirstructure[arcdir] = "dir" log("Renaming moved files") for ftpfile,ftpmd5 in ftp_checksums.items(): if (arc_checksums.has_key(ftpfile) and arc_checksums[ftpfile] == ftpmd5): continue for arcfile,arcmd5 in arc_checksums.items(): if (ftpmd5 == arcmd5 and (not ftp_checksums.has_key(arcfile) or ftp_checksums[arcfile] != arcmd5)): break else: continue log(" %s -> %s" % (ftpfile,arcfile)) ftp.rename(ftpfile,arcfile) del ftp_checksums[ftpfile] ftp_checksums[arcfile] = ftpmd5 log("Uploading new and changed files") for arcfile,arcmd5 in arc_checksums.items(): if (not ftp_checksums.has_key(arcfile) or arcmd5 != ftp_checksums[arcfile]): log(" %s" % arcfile) f = open(os.path.join(archive_dir,arcfile)) ftp.storbinary("STOR %s" % arcfile, f) f.close() ftp_checksums[arcfile] = arcmd5 log("Removing deleted files") for ftpfile in ftp_checksums.keys(): if not arc_checksums.has_key(ftpfile): log(" %s" % ftpfile) ftp.delete(ftpfile) del ftp_checksums[ftpfile] log("Removing deleted directories") for ftpdir in reversesort(ftp_dirstructure.keys()): if not arc_dirstructure.has_key(ftpdir): log(" %s" % ftpdir) ftp.rmd(ftpdir) del ftp_dirstructure[ftpdir] finally: log("Writing sync file") write_checksums(sync_file,ftp_checksums,ftp_dirstructure) log("Writing archive check file") write_checksums(check_file,arc_checksums,arc_dirstructure) log("Logging off") ftp.quit() if __name__ == '__main__': import sys sync(*sys.argv[1:])