#!/usr/bin/python
import sys,mailbox

def print_top_N(key, senders, limiter):
    noisy_bastards = sorted(senders.iteritems(), key=lambda x:x[1], reverse=True)[:limiter]
    total = sum(senders.itervalues())
    print "%s %i messages\n%s\n" % (key, total, '\n'.join("%02.2f%% %s" % (100 * (x[1]/float(total)), x[0]) for x in noisy_bastards))

def convert(orig_from, new_from, years_dict):
    for months in years_dict.itervalues():
        for v in months.itervalues():
            if orig_from not in v:
                continue
            v.setdefault(new_from, 0)
            v[new_from] += v.pop(orig_from)

def collapse_mail(mbox, into=None):
    if into is None:
        d = {}
    else:
        d = into
    for msg in mbox:
        date = msg.getdate('Date')
        if date[0] < 2000 or date[0] > 2007: continue
        sender = msg.getaddr('from')[1].lower()
        d.setdefault(date[0], {}).setdefault(date[1], {}).setdefault(sender, 0)
        d[date[0]][date[1]][sender] += 1
    return d

def dump_years(data, limiter=20):
    for year in sorted(data):
        d = {}
        for v in data[year].itervalues():
            for sender, val in v.iteritems():
                d.setdefault(sender, 0)
                d[sender] += val
        print_top_N(year, d, limiter)

def dump_months(data, limiter=20):
    for year in sorted(data):
        for month in sorted(data[year]):
            print_top_N('%i-%i' % (month, year), data[year][month], limiter)

def do_conversions(data):
    convert('ferringb@gentoo.org', 'ferringb@gmail.com', data)
    convert('ciaranm@gentoo.org', 'ciaranm@ciaranm.org', data)
    convert('ciaran.mccreesh@blueyonder.co.uk', 'ciaranm@ciaranm.org', data)
    convert('warnera6@egr.msu.edu', 'antarus@gentoo.org', data)
    convert('stuart@gentoo.org', 'stuart.herbert@gmail.com', data)
    convert('agaffney@technaut.darktalker.net', 'agaffney@gentoo.org', data)
    convert('agaffney@skylineaero.com', 'agaffney@gentoo.org', data)
    convert('foser@foser.dyn.warande.net', 'foser@gentoo.org', data)
    convert('drobbins.daniel@gmail.com', 'drobbins@gentoo.org', data)
    convert('jasonbstubbs@mailandnews.com', 'jstubbs@gentoo.org', data)
    convert('robbat2@orbis-terrarum.net', 'robbat2@gentoo.org', data)
    convert('daniel.armyr@home.se', 'danarmak@gentoo.org', data)
    convert('dams@idm.fr', 'dams@gentoo.org', data)
    convert('jmaynard@gentoo.org', 'jmaynard@conmicro.cx', data)
    convert('elfyn.mcbratney@emcb.co.uk', 'beu@gentoo.org', data)
    convert('dirtyepic.sk@gmail.com', 'dirtyepic@gentoo.org', data)
    convert('flameeyes@users.berlios.de', 'flameeyes@gentoo.org', data)
    convert('fmccor@inforead.com', 'fmccor@gentoo.org', data)
    convert('genone@genone.de', 'genone@gentoo.org', data)
    convert('luke-jr@utopios.org', 'luke-jr@gentoo.org', data)

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "need filename to parse"
        sys.exit(1)
    data = collapse_mail(mailbox.UnixMailbox(open(sys.argv[1])))
    do_conversions(data)
    dump_years(data)
    sys.exit(0)
