#!/usr/bin/env python
import getopt, sys, os, time
from datetime import datetime, timedelta
from boto.services.service import Service

Usage =  """
    get_results.py  [-q queuename] [-m mimetype_file] [-n] path

        queuename - The name of the SQS queue containing status messages.
                    This would be the queuename passed with the -o arg
                    to the start_service.py command
        mimetype_file - A file containing additional mimetypes to be
                        loaded before processing the results.  The file
                        should consist of lines of text where each line
                        represents a new mimetype and file extension
                        separated by whitespace, e.g."
                        
                        video/x-flv    flv

        path - The location on your local file system where results
               will be stored.
        if -n is specified, the result files will not be retrieved
        from S3, otherwise the result files will be downloaded to
        the specified path'
    """

class ResultProcessor:

    
    TimeFormat = '%a, %d %b %Y %H:%M:%S %Z'
    LogFileName = 'log.csv'

    def __init__(self, queue_name, mimetype_files=None):
        self.queue_name = queue_name
        self.service = Service(output_queue_name=queue_name,
                               read_userdata=False,
                               mimetype_files=mimetype_files,
                               preserve_file_name=True)
        self.log_fp = None
        self.num_files = 0
        self.total_time = 0
        self.min_time = timedelta.max
        self.max_time = timedelta.min
        self.earliest_time = datetime.max
        self.latest_time = datetime.min

    def calculate_stats(self, msg):
        start_time = datetime(*time.strptime(msg['Service-Read'],
                                             self.TimeFormat)[0:6])
        end_time = datetime(*time.strptime(msg['Service-Write'],
                                           self.TimeFormat)[0:6])
        elapsed_time = end_time - start_time
        if elapsed_time > self.max_time:
            self.max_time = elapsed_time
        if elapsed_time < self.min_time:
            self.min_time = elapsed_time
        self.total_time += elapsed_time.seconds
        if start_time < self.earliest_time:
            self.earliest_time = start_time
        if end_time > self.latest_time:
            self.latest_time = end_time

    def log_message(self, msg, path):
        keys = msg.keys()
        keys.sort()
        if not self.log_fp:
            self.log_fp = open(os.path.join(path, self.LogFileName), 'w')
            line = ','.join(keys)
            self.log_fp.write(line+'\n')
        values = []
        for key in keys:
            value = msg[key]
            if value.find(',') > 0:
                value = '"%s"' % value
            values.append(value)
        line = ','.join(values)
        self.log_fp.write(line+'\n')

    def get_results(self, path, get_file=True):
        total_files = 0
        total_time = 0
        if not os.path.isdir(path):
            os.mkdir(path)
        m = self.service.get_result(path, get_file=get_file)
        while m:
            total_files += 1
            self.log_message(m, path)
            self.calculate_stats(m)
            m = self.service.get_result(path, get_file=get_file)
        if self.log_fp:
            self.log_fp.close()
        print '%d results successfully retrieved.' % total_files
        if total_files > 0:
            self.avg_time = float(self.total_time)/total_files
            print 'Minimum Processing Time: %d' % self.min_time.seconds
            print 'Maximum Processing Time: %d' % self.max_time.seconds
            print 'Average Processing Time: %f' % self.avg_time
            self.elapsed_time = self.latest_time-self.earliest_time
            print 'Elapsed Time: %d' % self.elapsed_time.seconds
            tput = 1.0 / ((self.elapsed_time.seconds/60.0) / total_files)
            print 'Throughput: %f transactions / minute' % tput
        
def usage():
    print Usage
  
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hm:nq:',
                                   ['help', 'mimetypes',
                                    'no_retrieve', 'queue'])
    except:
        usage()
        sys.exit(2)
    queue_name = None
    mimetype_file = None
    notify = False
    get_file = True
    for o, a in opts:
        if o in ('-h', '--help'):
            usage()
            sys.exit()
        if o in ('-m', '--mimetypes'):
            mimetype_file = [a]
        if o in ('-n', '--no-retrieve'):
            get_file = False
        if o in ('-q', '--queue'):
            queue_name = a
    if len(args) == 0:
        usage()
        sys.exit()
    path = args[0]
    if len(args) > 1:
        tags = args[1]
    # mimetypes doesn't know about flv files, let's clue it in
    mimetypes.add_type('video/x-flv', '.flv')
    s = ResultProcessor(queue_name, mimetype_file)
    s.get_results(path, get_file)
    return 1

if __name__ == "__main__":
    main()


syntax highlighted by Code2HTML, v. 0.9.1