#! /usr/bin/python
#
#  The replicator <../http-replicator> proxy server is especially useful for maintaining a debian <http://www.debian.org> package cache.
#  Such caches tend to grow huge if nothing ever gets thrown away.
#  This script is meant to be run as a (weekly?) cron task to delete the packages that are unlikely to be needed anymore.
#  A list of deleted packages is mailed by cron to the system administrator.
#  If the script is run by hand this list goes to the stdout.
#  In that case the user should be the super user or the script will be in 'dummy mode' where nothing gets deleted.

import os, optparse

#  MAIN
#
#  Function [os.walk] is used to search the replicator cache for debian packages.
#  For each directory a dictionary [debs] is created which contains the present debian packages and their versions.
#  If a package's version list contains more than [KEEP] elements the command [dpkg --compare-versions] is used to find the oldest packages to remove.
#  The variable [deleted] keeps track of all deleted kilobytes to summarize the newly reclaimed free space.

def main ():

	parser = optparse.OptionParser()
	parser.add_option('-d', '--dir', type='string', default=os.path.curdir, help='cache in DIR instead of current directory')
	parser.add_option('-k', '--keep', type='int', default=0, help='number of packages to KEEP')
	options, args = parser.parse_args() # parse command line

	if options.keep < 1:
		return
	if not os.path.isdir(options.dir):
		parser.error('invalid directory %r' % options.dir)

	print 'cleaning http-replicator cache...'
	print
	deleted = 0 # nothing deleted initially
	for root, dirs, files in os.walk(options.dir): # scan replicator cache
		debs = {} # start new dictionary for each directory

		for file, extension in map(os.path.splitext, files): # iterate over files
			if extension == '.deb': # debian package found
				program, version = file.split('_', 1) # split package name from version
				debs.setdefault(program, []).append(version) # put package in dictionary

		for program, versions in debs.items(): # iterate over packages
			if len(versions) > options.keep: # too many versions present
				versions.sort(lambda x, y: os.system('dpkg --compare-versions %s gt %s' % (x, y)) and 1 or -1) # sort by version
				size = 0
				for version in versions[options.keep:]:
					file = os.path.join(root, '%s_%s.deb' % (program, version))
					size += os.path.getsize(file) / 1024.
					os.getuid() or os.remove(file) # delete old package if possible
				print '%6i %s kept %s deleted %s' % (size, program, ','.join(versions[:options.keep]), ','.join(versions[options.keep:]))
				deleted += size # add filesize
	print
	print '%6i kB deleted' % deleted

if __name__ == '__main__':

	main()
