#!/usr/bin/env python
# Copyright (c) 2007-2014 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/AUTHOR)
# GPLv2 / GPLv3
from __future__ import print_function
from builtins import range
from builtins import object
import datetime
import getopt
import glob
import os
import pickle
import platform
import re
import shutil
import subprocess
import sys
import time
import zlib

if sys.version_info < (2, 6):
    print("Python 2.6 or higher is required for gitstats", file=sys.stderr)
    sys.exit(1)

PY3 = sys.version_info > (3,)

from multiprocessing import Pool

os.environ['LC_ALL'] = 'C'

GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
ON_LINUX = (platform.system() == 'Linux')
WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')

exectime_internal = 0.0
exectime_external = 0.0
time_start = time.time()

# By default, gnuplot is searched from path, but can be overridden with the
# environment variable "GNUPLOT"
gnuplot_cmd = 'gnuplot'
if 'GNUPLOT' in os.environ:
    gnuplot_cmd = os.environ['GNUPLOT']

conf = {
    'max_domains': 10,
    'max_ext_length': 10,
    'style': 'gitstats.css',
    'max_authors': 20,
    'authors_top': 5,
    'commit_begin': '',
    'commit_end': 'HEAD',
    'linear_linestats': 1,
    'project_name': '',
    'processes': 8,
    'start_date': ''
}

def getpipeoutput(cmds, quiet = False):
    global exectime_external
    start = time.time()
    if not quiet and ON_LINUX and os.isatty(1):
        print('>> ' + ' | '.join(cmds), end=' ')
        sys.stdout.flush()
    p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
    processes=[p]
    for x in cmds[1:]:
        p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True)
        processes.append(p)
    if PY3:
        output = bytes.decode(p.communicate()[0])
    else:
        output = p.communicate()[0]
    for p in processes:
        p.wait()
    end = time.time()
    if not quiet:
        if ON_LINUX and os.isatty(1):
            print('\r', end=' ')
        print('[%.5f] >> %s' % (end - start, ' | '.join(cmds)))
    exectime_external += (end - start)
    return output.rstrip('\n')

def getlogrange(defaultrange = 'HEAD', end_only = True):
    commit_range = getcommitrange(defaultrange, end_only)
    if len(conf['start_date']) > 0:
        return '--since="%s" "%s"' % (conf['start_date'], commit_range)
    return commit_range

def getcommitrange(defaultrange = 'HEAD', end_only = False):
    if len(conf['commit_end']) > 0:
        if end_only or len(conf['commit_begin']) == 0:
            return conf['commit_end']
        return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
    return defaultrange

def getkeyssortedbyvalues(dict):
    return [el[1] for el in sorted([(el[1], el[0]) for el in list(dict.items())])]

# dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
def getkeyssortedbyvaluekey(d, key):
    return [el[1] for el in sorted([(d[el][key], el) for el in list(d.keys())])]

def getstatsummarycounts(line):
    numbers = re.findall('\d+', line)
    if   len(numbers) == 1:
        # neither insertions nor deletions: may probably only happen for "0 files changed"
        numbers.append(0);
        numbers.append(0);
    elif len(numbers) == 2 and line.find('(+)') != -1:
        numbers.append(0);    # only insertions were printed on line
    elif len(numbers) == 2 and line.find('(-)') != -1:
        numbers.insert(1, 0); # only deletions were printed on line
    return numbers

VERSION = "ad6df85"
def getversion():
    global VERSION
    if VERSION == 0:
        gitstats_repo = os.path.dirname(os.path.abspath(__file__))
        VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
            (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
    return VERSION

def getgitversion():
    return getpipeoutput(['git --version']).split('\n')[0]

def getgnuplotversion():
    return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]

def getnumoffilesfromrev(time_rev):
    """
    Get number of files changed in commit
    """
    time, rev = time_rev
    return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0]))

def getnumoflinesinblob(ext_blob):
    """
    Get number of lines in blob
    """
    ext, blob_id = ext_blob
    return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0]))

class DataCollector(object):
    """Manages data collection from a revision control repository."""
    def __init__(self):
        self.stamp_created = time.time()
        self.cache = {}
        self.total_authors = 0
        self.activity_by_hour_of_day = {} # hour -> commits
        self.activity_by_day_of_week = {} # day -> commits
        self.activity_by_month_of_year = {} # month [1-12] -> commits
        self.activity_by_hour_of_week = {} # weekday -> hour -> commits
        self.activity_by_hour_of_day_busiest = 0
        self.activity_by_hour_of_week_busiest = 0
        self.activity_by_year_week = {} # yy_wNN -> commits
        self.activity_by_year_week_peak = 0

        self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}

        self.total_commits = 0
        self.total_files = 0
        self.authors_by_commits = 0

        # domains
        self.domains = {} # domain -> commits

        # author of the month
        self.author_of_month = {} # month -> author -> commits
        self.author_of_year = {} # year -> author -> commits
        self.commits_by_month = {} # month -> commits
        self.commits_by_year = {} # year -> commits
        self.lines_added_by_month = {} # month -> lines added
        self.lines_added_by_year = {} # year -> lines added
        self.lines_removed_by_month = {} # month -> lines removed
        self.lines_removed_by_year = {} # year -> lines removed
        self.first_commit_stamp = 0
        self.last_commit_stamp = 0
        self.last_active_day = None
        self.active_days = set()

        # lines
        self.total_lines = 0
        self.total_lines_added = 0
        self.total_lines_removed = 0

        # size
        self.total_size = 0

        # timezone
        self.commits_by_timezone = {} # timezone -> commits

        # tags
        self.tags = {}

        self.files_by_stamp = {} # stamp -> files

        # extensions
        self.extensions = {} # extension -> files, lines

        # line statistics
        self.changes_by_date = {} # stamp -> { files, ins, del }

    ##
    # This should be the main function to extract data from the repository.
    def collect(self, dir):
        self.dir = dir
        if len(conf['project_name']) == 0:
            self.projectname = os.path.basename(os.path.abspath(dir))
        else:
            self.projectname = conf['project_name']

    ##
    # Load cacheable data
    def loadCache(self, cachefile):
        if not os.path.exists(cachefile):
            return
        print('Loading cache...')
        f = open(cachefile, 'rb')
        try:
            self.cache = pickle.loads(zlib.decompress(f.read()))
        except:
            # temporary hack to upgrade non-compressed caches
            f.seek(0)
            self.cache = pickle.load(f)
        f.close()

    ##
    # Produce any additional statistics from the extracted data.
    def refine(self):
        pass

    ##
    # : get a dictionary of author
    def getAuthorInfo(self, author):
        return None

    def getActivityByDayOfWeek(self):
        return {}

    def getActivityByHourOfDay(self):
        return {}

    # : get a dictionary of domains
    def getDomainInfo(self, domain):
        return None

    ##
    # Get a list of authors
    def getAuthors(self):
        return []

    def getFirstCommitDate(self):
        return datetime.datetime.now()

    def getLastCommitDate(self):
        return datetime.datetime.now()

    def getStampCreated(self):
        return self.stamp_created

    def getTags(self):
        return []

    def getTotalAuthors(self):
        return -1

    def getTotalCommits(self):
        return -1

    def getTotalFiles(self):
        return -1

    def getTotalLOC(self):
        return -1

    ##
    # Save cacheable data
    def saveCache(self, cachefile):
        print('Saving cache...')
        tempfile = cachefile + '.tmp'
        f = open(tempfile, 'wb')
        #pickle.dump(self.cache, f)
        data = zlib.compress(pickle.dumps(self.cache))
        f.write(data)
        f.close()
        try:
            os.remove(cachefile)
        except OSError:
            pass
        os.rename(tempfile, cachefile)

class GitDataCollector(DataCollector):
    def collect(self, dir):
        DataCollector.collect(self, dir)

        self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
        #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))

        # tags
        lines = getpipeoutput(['git show-ref --tags']).split('\n')
        for line in lines:
            if len(line) == 0:
                continue
            (hash, tag) = line.split(' ')

            tag = tag.replace('refs/tags/', '')
            output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
            if len(output) > 0:
                parts = output.split(' ')
                stamp = 0
                try:
                    stamp = int(parts[0])
                except ValueError:
                    stamp = 0
                self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }

        # collect info on tags, starting from latest
        tags_sorted_by_date_desc = [el[1] for el in reversed(sorted([(el[1]['date'], el[0]) for el in list(self.tags.items())]))]
        prev = None
        for tag in reversed(tags_sorted_by_date_desc):
            cmd = 'git shortlog -s "%s"' % tag
            if prev != None:
                cmd += ' "^%s"' % prev
            output = getpipeoutput([cmd])
            if len(output) == 0:
                continue
            prev = tag
            for line in output.split('\n'):
                parts = re.split('\s+', line, 2)
                commits = int(parts[1])
                author = parts[2]
                self.tags[tag]['commits'] += commits
                self.tags[tag]['authors'][author] = commits

        # Collect revision statistics
        # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
        lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
        for line in lines:
            parts = line.split(' ', 4)
            author = ''
            try:
                stamp = int(parts[0])
            except ValueError:
                stamp = 0
            timezone = parts[3]
            author, mail = parts[4].split('<', 1)
            author = author.rstrip()
            mail = mail.rstrip('>')
            domain = '?'
            if mail.find('@') != -1:
                domain = mail.rsplit('@', 1)[1]
            date = datetime.datetime.fromtimestamp(float(stamp))

            # First and last commit stamp (may be in any order because of cherry-picking and patches)
            if stamp > self.last_commit_stamp:
                self.last_commit_stamp = stamp
            if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
                self.first_commit_stamp = stamp

            # activity
            # hour
            hour = date.hour
            self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
            # most active hour?
            if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
                self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]

            # day of week
            day = date.weekday()
            self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1

            # domain stats
            if domain not in self.domains:
                self.domains[domain] = {}
            # commits
            self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1

            # hour of week
            if day not in self.activity_by_hour_of_week:
                self.activity_by_hour_of_week[day] = {}
            self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
            # most active hour?
            if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
                self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]

            # month of year
            month = date.month
            self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1

            # yearly/weekly activity
            yyw = date.strftime('%Y-%W')
            self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
            if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
                self.activity_by_year_week_peak = self.activity_by_year_week[yyw]

            # author stats
            if author not in self.authors:
                self.authors[author] = {}
            # commits, note again that commits may be in any date order because of cherry-picking and patches
            if 'last_commit_stamp' not in self.authors[author]:
                self.authors[author]['last_commit_stamp'] = stamp
            if stamp > self.authors[author]['last_commit_stamp']:
                self.authors[author]['last_commit_stamp'] = stamp
            if 'first_commit_stamp' not in self.authors[author]:
                self.authors[author]['first_commit_stamp'] = stamp
            if stamp < self.authors[author]['first_commit_stamp']:
                self.authors[author]['first_commit_stamp'] = stamp

            # author of the month/year
            yymm = date.strftime('%Y-%m')
            if yymm in self.author_of_month:
                self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
            else:
                self.author_of_month[yymm] = {}
                self.author_of_month[yymm][author] = 1
            self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1

            yy = date.year
            if yy in self.author_of_year:
                self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
            else:
                self.author_of_year[yy] = {}
                self.author_of_year[yy][author] = 1
            self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1

            # authors: active days
            yymmdd = date.strftime('%Y-%m-%d')
            if 'last_active_day' not in self.authors[author]:
                self.authors[author]['last_active_day'] = yymmdd
                self.authors[author]['active_days'] = set([yymmdd])
            elif yymmdd != self.authors[author]['last_active_day']:
                self.authors[author]['last_active_day'] = yymmdd
                self.authors[author]['active_days'].add(yymmdd)

            # project: active days
            if yymmdd != self.last_active_day:
                self.last_active_day = yymmdd
                self.active_days.add(yymmdd)

            # timezone
            self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1

        # outputs "<stamp> <files>" for each revision
        revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
        lines = []
        revs_to_read = []
        time_rev_count = []
        #Look up rev in cache and take info from cache if found
        #If not append rev to list of rev to read from repo
        for revline in revlines:
            time, rev = revline.split(' ')
            #if cache empty then add time and rev to list of new rev's
            #otherwise try to read needed info from cache
            if 'files_in_tree' not in list(self.cache.keys()):
                revs_to_read.append((time,rev))
                continue
            if rev in list(self.cache['files_in_tree'].keys()):
                lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
            else:
                revs_to_read.append((time,rev))

        #Read revisions from repo
        pool = Pool(processes=conf['processes'])
        time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
        pool.terminate()
        pool.join()

        #Update cache with new revisions and append then to general list
        for (time, rev, count) in time_rev_count:
            if 'files_in_tree' not in self.cache:
                self.cache['files_in_tree'] = {}
            self.cache['files_in_tree'][rev] = count
            lines.append('%d %d' % (int(time), count))

        self.total_commits += len(lines)
        for line in lines:
            parts = line.split(' ')
            if len(parts) != 2:
                continue
            (stamp, files) = parts[0:2]
            try:
                self.files_by_stamp[int(stamp)] = int(files)
            except ValueError:
                print('Warning: failed to parse line "%s"' % line)

        # extensions and size of files
        lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
        blobs_to_read = []
        for line in lines:
            if len(line) == 0:
                continue
            parts = re.split('\s+', line, 4)
            if parts[0] == '160000' and parts[3] == '-':
                # skip submodules
                continue
            blob_id = parts[2]
            size = int(parts[3])
            fullpath = parts[4]

            self.total_size += size
            self.total_files += 1

            filename = fullpath.split('/')[-1] # strip directories
            if filename.find('.') == -1 or filename.rfind('.') == 0:
                ext = ''
            else:
                ext = filename[(filename.rfind('.') + 1):]
            if len(ext) > conf['max_ext_length']:
                ext = ''
            if ext not in self.extensions:
                self.extensions[ext] = {'files': 0, 'lines': 0}
            self.extensions[ext]['files'] += 1
            #if cache empty then add ext and blob id to list of new blob's
            #otherwise try to read needed info from cache
            if 'lines_in_blob' not in list(self.cache.keys()):
                blobs_to_read.append((ext,blob_id))
                continue
            if blob_id in list(self.cache['lines_in_blob'].keys()):
                self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
            else:
                blobs_to_read.append((ext,blob_id))

        #Get info abount line count for new blob's that wasn't found in cache
        pool = Pool(processes=conf['processes'])
        ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
        pool.terminate()
        pool.join()

        #Update cache and write down info about number of number of lines
        for (ext, blob_id, linecount) in ext_blob_linecount:
            if 'lines_in_blob' not in self.cache:
                self.cache['lines_in_blob'] = {}
            self.cache['lines_in_blob'][blob_id] = linecount
            self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]

        # line statistics
        # outputs:
        #  N files changed, N insertions (+), N deletions(-)
        # <stamp> <author>
        self.changes_by_date = {} # stamp -> { files, ins, del }
        # computation of lines of code by date is better done
        # on a linear history.
        extra = ''
        if conf['linear_linestats']:
            extra = '--first-parent -m'
        lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
        lines.reverse()
        files = 0; inserted = 0; deleted = 0; total_lines = 0
        author = None
        for line in lines:
            if len(line) == 0:
                continue

            # <stamp> <author>
            if re.search('files? changed', line) == None:
                pos = line.find(' ')
                if pos != -1:
                    try:
                        (stamp, author) = (int(line[:pos]), line[pos+1:])
                        self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }

                        date = datetime.datetime.fromtimestamp(stamp)
                        yymm = date.strftime('%Y-%m')
                        self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
                        self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted

                        yy = date.year
                        self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
                        self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted

                        files, inserted, deleted = 0, 0, 0
                    except ValueError:
                        print('Warning: unexpected line "%s"' % line)
                else:
                    print('Warning: unexpected line "%s"' % line)
            else:
                numbers = getstatsummarycounts(line)

                if len(numbers) == 3:
                    (files, inserted, deleted) = [int(el) for el in numbers]
                    total_lines += inserted
                    total_lines -= deleted
                    self.total_lines_added += inserted
                    self.total_lines_removed += deleted

                else:
                    print('Warning: failed to handle line "%s"' % line)
                    (files, inserted, deleted) = (0, 0, 0)
                #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
        self.total_lines += total_lines

        # Per-author statistics

        # defined for stamp, author only if author commited at this timestamp.
        self.changes_by_date_by_author = {} # stamp -> author -> lines_added

        # Similar to the above, but never use --first-parent
        # (we need to walk through every commit to know who
        # committed what, not just through mainline)
        lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
        lines.reverse()
        files = 0; inserted = 0; deleted = 0
        author = None
        stamp = 0
        for line in lines:
            if len(line) == 0:
                continue

            # <stamp> <author>
            if re.search('files? changed', line) == None:
                pos = line.find(' ')
                if pos != -1:
                    try:
                        oldstamp = stamp
                        (stamp, author) = (int(line[:pos]), line[pos+1:])
                        if oldstamp > stamp:
                            # clock skew, keep old timestamp to avoid having ugly graph
                            stamp = oldstamp
                        if author not in self.authors:
                            self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
                        self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
                        self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
                        self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
                        if stamp not in self.changes_by_date_by_author:
                            self.changes_by_date_by_author[stamp] = {}
                        if author not in self.changes_by_date_by_author[stamp]:
                            self.changes_by_date_by_author[stamp][author] = {}
                        self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
                        self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
                        files, inserted, deleted = 0, 0, 0
                    except ValueError:
                        print('Warning: unexpected line "%s"' % line)
                else:
                    print('Warning: unexpected line "%s"' % line)
            else:
                numbers = getstatsummarycounts(line);

                if len(numbers) == 3:
                    (files, inserted, deleted) = [int(el) for el in numbers]
                else:
                    print('Warning: failed to handle line "%s"' % line)
                    (files, inserted, deleted) = (0, 0, 0)

    def refine(self):
        # authors
        # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
        self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
        self.authors_by_commits.reverse() # most first
        for i, name in enumerate(self.authors_by_commits):
            self.authors[name]['place_by_commits'] = i + 1

        for name in list(self.authors.keys()):
            a = self.authors[name]
            a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
            date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
            date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
            delta = date_last - date_first
            a['date_first'] = date_first.strftime('%Y-%m-%d')
            a['date_last'] = date_last.strftime('%Y-%m-%d')
            a['timedelta'] = delta
            if 'lines_added' not in a: a['lines_added'] = 0
            if 'lines_removed' not in a: a['lines_removed'] = 0

    def getActiveDays(self):
        return self.active_days

    def getActivityByDayOfWeek(self):
        return self.activity_by_day_of_week

    def getActivityByHourOfDay(self):
        return self.activity_by_hour_of_day

    def getAuthorInfo(self, author):
        return self.authors[author]

    def getAuthors(self, limit = None):
        res = getkeyssortedbyvaluekey(self.authors, 'commits')
        res.reverse()
        return res[:limit]

    def getCommitDeltaDays(self):
        return (self.last_commit_stamp // 86400 - self.first_commit_stamp // 86400) + 1

    def getDomainInfo(self, domain):
        return self.domains[domain]

    def getDomains(self):
        return list(self.domains.keys())

    def getFirstCommitDate(self):
        return datetime.datetime.fromtimestamp(self.first_commit_stamp)

    def getLastCommitDate(self):
        return datetime.datetime.fromtimestamp(self.last_commit_stamp)

    def getTags(self):
        lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
        return lines.split('\n')

    def getTagDate(self, tag):
        return self.revToDate('tags/' + tag)

    def getTotalAuthors(self):
        return self.total_authors

    def getTotalCommits(self):
        return self.total_commits

    def getTotalFiles(self):
        return self.total_files

    def getTotalLOC(self):
        return self.total_lines

    def getTotalSize(self):
        return self.total_size

    def revToDate(self, rev):
        stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
        return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')

class ReportCreator(object):
    """Creates the actual report based on given data."""
    def __init__(self):
        pass

    def create(self, data, path):
        self.data = data
        self.path = path

def html_linkify(text):
    return text.lower().replace(' ', '_')

def html_header(level, text):
    name = html_linkify(text)
    return '\n<h%d id="%s"><a href="#%s">%s</a></h%d>\n\n' % (level, name, name, text, level)

class HTMLReportCreator(ReportCreator):
    def create(self, data, path):
        ReportCreator.create(self, data, path)
        self.title = data.projectname

        # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
        binarypath = os.path.dirname(os.path.abspath(__file__))
        secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
        basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
        for file in (conf['style'], 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
            for base in basedirs:
                src = base + '/' + file
                if os.path.exists(src):
                    shutil.copyfile(src, path + '/' + file)
                    break
            else:
                print('Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs))

        f = open(path + "/index.html", 'wt')
        format = '%Y-%m-%d %H:%M:%S'
        self.printHeader(f)

        f.write('<h1>GitStats - %s</h1>' % data.projectname)

        self.printNav(f)

        f.write('<dl>')
        f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
        f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
        f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
        f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
        f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
        f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
        f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
        f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
        f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
        f.write('</dl>')

        f.write('</body>\n</html>')
        f.close()

        ###
        # Activity
        f = open(path + '/activity.html', 'wt')
        self.printHeader(f)
        f.write('<h1>Activity</h1>')
        self.printNav(f)

        #f.write('<h2>Last 30 days</h2>')

        #f.write('<h2>Last 12 months</h2>')

        # Weekly activity
        WEEKS = 32
        f.write(html_header(2, 'Weekly activity'))
        f.write('<p>Last %d weeks</p>' % WEEKS)

        # generate weeks to show (previous N weeks from now)
        now = datetime.datetime.now()
        deltaweek = datetime.timedelta(7)
        weeks = []
        stampcur = now
        for i in range(0, WEEKS):
            weeks.insert(0, stampcur.strftime('%Y-%W'))
            stampcur -= deltaweek

        # top row: commits & bar
        f.write('<table class="noborders"><tr>')
        for i in range(0, WEEKS):
            commits = 0
            if weeks[i] in data.activity_by_year_week:
                commits = data.activity_by_year_week[weeks[i]]

            percentage = 0
            if weeks[i] in data.activity_by_year_week:
                percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
            height = max(1, int(200 * percentage))
            f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))

        # bottom row: year/week
        f.write('</tr><tr>')
        for i in range(0, WEEKS):
            f.write('<td>%s</td>' % (WEEKS - i))
        f.write('</tr></table>')

        # Hour of Day
        f.write(html_header(2, 'Hour of Day'))
        hour_of_day = data.getActivityByHourOfDay()
        f.write('<table><tr><th>Hour</th>')
        for i in range(0, 24):
            f.write('<th>%d</th>' % i)
        f.write('</tr>\n<tr><th>Commits</th>')
        fp = open(path + '/hour_of_day.dat', 'wt')
        for i in range(0, 24):
            if i in hour_of_day:
                r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
                f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
                fp.write('%d %d\n' % (i, hour_of_day[i]))
            else:
                f.write('<td>0</td>')
                fp.write('%d 0\n' % i)
        fp.close()
        f.write('</tr>\n<tr><th>%</th>')
        totalcommits = data.getTotalCommits()
        for i in range(0, 24):
            if i in hour_of_day:
                r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
                f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
            else:
                f.write('<td>0.00</td>')
        f.write('</tr></table>')
        f.write('<img src="hour_of_day.png" alt="Hour of Day">')
        fg = open(path + '/hour_of_day.dat', 'wt')
        for i in range(0, 24):
            if i in hour_of_day:
                fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
            else:
                fg.write('%d 0\n' % (i + 1))
        fg.close()

        # Day of Week
        f.write(html_header(2, 'Day of Week'))
        day_of_week = data.getActivityByDayOfWeek()
        f.write('<div class="vtable"><table>')
        f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
        fp = open(path + '/day_of_week.dat', 'wt')
        for d in range(0, 7):
            commits = 0
            if d in day_of_week:
                commits = day_of_week[d]
            fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
            f.write('<tr>')
            f.write('<th>%s</th>' % (WEEKDAYS[d]))
            if d in day_of_week:
                f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
            else:
                f.write('<td>0</td>')
            f.write('</tr>')
        f.write('</table></div>')
        f.write('<img src="day_of_week.png" alt="Day of Week">')
        fp.close()

        # Hour of Week
        f.write(html_header(2, 'Hour of Week'))
        f.write('<table>')

        f.write('<tr><th>Weekday</th>')
        for hour in range(0, 24):
            f.write('<th>%d</th>' % (hour))
        f.write('</tr>')

        for weekday in range(0, 7):
            f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
            for hour in range(0, 24):
                try:
                    commits = data.activity_by_hour_of_week[weekday][hour]
                except KeyError:
                    commits = 0
                if commits != 0:
                    f.write('<td')
                    r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
                    f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
                    f.write('>%d</td>' % commits)
                else:
                    f.write('<td></td>')
            f.write('</tr>')

        f.write('</table>')

        # Month of Year
        f.write(html_header(2, 'Month of Year'))
        f.write('<div class="vtable"><table>')
        f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
        fp = open (path + '/month_of_year.dat', 'wt')
        for mm in range(1, 13):
            commits = 0
            if mm in data.activity_by_month_of_year:
                commits = data.activity_by_month_of_year[mm]
            f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
            fp.write('%d %d\n' % (mm, commits))
        fp.close()
        f.write('</table></div>')
        f.write('<img src="month_of_year.png" alt="Month of Year">')

        # Commits by year/month
        f.write(html_header(2, 'Commits by year/month'))
        f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
        for yymm in reversed(sorted(data.commits_by_month.keys())):
            f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
        f.write('</table></div>')
        f.write('<img src="commits_by_year_month.png" alt="Commits by year/month">')
        fg = open(path + '/commits_by_year_month.dat', 'wt')
        for yymm in sorted(data.commits_by_month.keys()):
            fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
        fg.close()

        # Commits by year
        f.write(html_header(2, 'Commits by Year'))
        f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
        for yy in reversed(sorted(data.commits_by_year.keys())):
            f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
        f.write('</table></div>')
        f.write('<img src="commits_by_year.png" alt="Commits by Year">')
        fg = open(path + '/commits_by_year.dat', 'wt')
        for yy in sorted(data.commits_by_year.keys()):
            fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
        fg.close()

        # Commits by timezone
        f.write(html_header(2, 'Commits by Timezone'))
        f.write('<table><tr>')
        f.write('<th>Timezone</th><th>Commits</th>')
        f.write('</tr>')
        max_commits_on_tz = max(data.commits_by_timezone.values())
        for i in sorted(list(data.commits_by_timezone.keys()), key = lambda n : int(n)):
            commits = data.commits_by_timezone[i]
            r = 127 + int((float(commits) / max_commits_on_tz) * 128)
            f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
        f.write('</table>')

        f.write('</body></html>')
        f.close()

        ###
        # Authors
        f = open(path + '/authors.html', 'wt')
        self.printHeader(f)

        f.write('<h1>Authors</h1>')
        self.printNav(f)

        # Authors :: List of authors
        f.write(html_header(2, 'List of Authors'))

        f.write('<table class="authors sortable" id="authors">')
        f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
        for author in data.getAuthors(conf['max_authors']):
            info = data.getAuthorInfo(author)
            f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
        f.write('</table>')

        allauthors = data.getAuthors()
        if len(allauthors) > conf['max_authors']:
            rest = allauthors[conf['max_authors']:]
            f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))

        f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
        f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author">')
        if len(allauthors) > conf['max_authors']:
            f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])

        f.write(html_header(2, 'Commits per Author'))
        f.write('<img src="commits_by_author.png" alt="Commits per Author">')
        if len(allauthors) > conf['max_authors']:
            f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])

        fgl = open(path + '/lines_of_code_by_author.dat', 'wt')
        fgc = open(path + '/commits_by_author.dat', 'wt')

        lines_by_authors = {} # cumulated added lines by
        # author. to save memory,
        # changes_by_date_by_author[stamp][author] is defined
        # only at points where author commits.
        # lines_by_authors allows us to generate all the
        # points in the .dat file.

        # Don't rely on getAuthors to give the same order each
        # time. Be robust and keep the list in a variable.
        commits_by_authors = {} # cumulated added lines by

        self.authors_to_plot = data.getAuthors(conf['max_authors'])
        for author in self.authors_to_plot:
            lines_by_authors[author] = 0
            commits_by_authors[author] = 0
        for stamp in sorted(data.changes_by_date_by_author.keys()):
            fgl.write('%d' % stamp)
            fgc.write('%d' % stamp)
            for author in self.authors_to_plot:
                if author in list(data.changes_by_date_by_author[stamp].keys()):
                    lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
                    commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
                fgl.write(' %d' % lines_by_authors[author])
                fgc.write(' %d' % commits_by_authors[author])
            fgl.write('\n')
            fgc.write('\n')
        fgl.close()
        fgc.close()

        # Authors :: Author of Month
        f.write(html_header(2, 'Author of Month'))
        f.write('<table class="sortable" id="aom">')
        f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
        for yymm in reversed(sorted(data.author_of_month.keys())):
            authordict = data.author_of_month[yymm]
            authors = getkeyssortedbyvalues(authordict)
            authors.reverse()
            commits = data.author_of_month[yymm][authors[0]]
            next = ', '.join(authors[1:conf['authors_top']+1])
            f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))

        f.write('</table>')

        f.write(html_header(2, 'Author of Year'))
        f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
        for yy in reversed(sorted(data.author_of_year.keys())):
            authordict = data.author_of_year[yy]
            authors = getkeyssortedbyvalues(authordict)
            authors.reverse()
            commits = data.author_of_year[yy][authors[0]]
            next = ', '.join(authors[1:conf['authors_top']+1])
            f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
        f.write('</table>')

        # Domains
        f.write(html_header(2, 'Commits by Domains'))
        domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
        domains_by_commits.reverse() # most first
        f.write('<div class="vtable"><table>')
        f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
        fp = open(path + '/domains.dat', 'wt')
        n = 0
        for domain in domains_by_commits:
            if n == conf['max_domains']:
                break
            commits = 0
            n += 1
            info = data.getDomainInfo(domain)
            fp.write('%s %d %d\n' % (domain, n , info['commits']))
            f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
        f.write('</table></div>')
        f.write('<img src="domains.png" alt="Commits by Domains">')
        fp.close()

        f.write('</body></html>')
        f.close()

        ###
        # Files
        f = open(path + '/files.html', 'wt')
        self.printHeader(f)
        f.write('<h1>Files</h1>')
        self.printNav(f)

        f.write('<dl>\n')
        f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
        f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
        try:
            f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
        except ZeroDivisionError:
            pass
        f.write('</dl>\n')

        # Files :: File count by date
        f.write(html_header(2, 'File count by date'))

        # use set to get rid of duplicate/unnecessary entries
        files_by_date = set()
        for stamp in sorted(data.files_by_stamp.keys()):
            files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))

        fg = open(path + '/files_by_date.dat', 'wt')
        for line in sorted(list(files_by_date)):
            fg.write('%s\n' % line)
        #for stamp in sorted(data.files_by_stamp.keys()):
        #    fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
        fg.close()

        f.write('<img src="files_by_date.png" alt="Files by Date">')

        #f.write('<h2>Average file size by date</h2>')

        # Files :: Extensions
        f.write(html_header(2, 'Extensions'))
        f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
        for ext in sorted(data.extensions.keys()):
            files = data.extensions[ext]['files']
            lines = data.extensions[ext]['lines']
            try:
                loc_percentage = (100.0 * lines) / data.getTotalLOC()
            except ZeroDivisionError:
                loc_percentage = 0
            f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines // files))
        f.write('</table>')

        f.write('</body></html>')
        f.close()

        ###
        # Lines
        f = open(path + '/lines.html', 'wt')
        self.printHeader(f)
        f.write('<h1>Lines</h1>')
        self.printNav(f)

        f.write('<dl>\n')
        f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
        f.write('</dl>\n')

        f.write(html_header(2, 'Lines of Code'))
        f.write('<img src="lines_of_code.png" alt="Lines of Code">')

        fg = open(path + '/lines_of_code.dat', 'wt')
        for stamp in sorted(data.changes_by_date.keys()):
            fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
        fg.close()

        f.write('</body></html>')
        f.close()

        ###
        # tags.html
        f = open(path + '/tags.html', 'wt')
        self.printHeader(f)
        f.write('<h1>Tags</h1>')
        self.printNav(f)

        f.write('<dl>')
        f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
        if len(data.tags) > 0:
            f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
        f.write('</dl>')

        f.write('<table class="tags">')
        f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
        # sort the tags by date desc
        tags_sorted_by_date_desc = [el[1] for el in reversed(sorted([(el[1]['date'], el[0]) for el in list(data.tags.items())]))]
        for tag in tags_sorted_by_date_desc:
            authorinfo = []
            self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
            for i in reversed(self.authors_by_commits):
                authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
            f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
        f.write('</table>')

        f.write('</body></html>')
        f.close()

        self.createGraphs(path)

    def createGraphs(self, path):
        print('Generating graphs...')

        # hour of day
        f = open(path + '/hour_of_day.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'hour_of_day.png'
unset key
set xrange [0.5:24.5]
set yrange [0:]
set xtics 4
set grid y
set ylabel "Commits"
plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
""")
        f.close()

        # day of week
        f = open(path + '/day_of_week.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'day_of_week.png'
unset key
set xrange [0.5:7.5]
set yrange [0:]
set xtics 1
set grid y
set ylabel "Commits"
plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
""")
        f.close()

        # Domains
        f = open(path + '/domains.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'domains.png'
unset key
unset xtics
set yrange [0:]
set grid y
set ylabel "Commits"
plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
""")
        f.close()

        # Month of Year
        f = open(path + '/month_of_year.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'month_of_year.png'
unset key
set xrange [0.5:12.5]
set yrange [0:]
set xtics 1
set grid y
set ylabel "Commits"
plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
""")
        f.close()

        # commits_by_year_month
        f = open(path + '/commits_by_year_month.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'commits_by_year_month.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%Y-%m"
set format x "%Y-%m"
set xtics rotate
set bmargin 5
set grid y
set ylabel "Commits"
plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
""")
        f.close()

        # commits_by_year
        f = open(path + '/commits_by_year.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'commits_by_year.png'
unset key
set yrange [0:]
set xtics 1 rotate
set grid y
set ylabel "Commits"
set yrange [0:]
plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
""")
        f.close()

        # Files by date
        f = open(path + '/files_by_date.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'files_by_date.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%Y-%m-%d"
set format x "%Y-%m-%d"
set grid y
set ylabel "Files"
set xtics rotate
set ytics autofreq
set bmargin 6
plot 'files_by_date.dat' using 1:2 w steps
""")
        f.close()

        # Lines of Code
        f = open(path + '/lines_of_code.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set output 'lines_of_code.png'
unset key
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Lines"
set xtics rotate
set bmargin 6
plot 'lines_of_code.dat' using 1:2 w lines
""")
        f.close()

        # Lines of Code Added per author
        f = open(path + '/lines_of_code_by_author.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set terminal png transparent size 640,480
set output 'lines_of_code_by_author.png'
set key left top
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Lines"
set xtics rotate
set bmargin 6
plot """
)
        i = 1
        plots = []
        for a in self.authors_to_plot:
            i = i + 1
            author = a.replace("\"", "\\\"").replace("`", "")
            plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
        f.write(", ".join(plots))
        f.write('\n')

        f.close()

        # Commits per author
        f = open(path + '/commits_by_author.plot', 'wt')
        f.write(GNUPLOT_COMMON)
        f.write(
"""
set terminal png transparent size 640,480
set output 'commits_by_author.png'
set key left top
set yrange [0:]
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set grid y
set ylabel "Commits"
set xtics rotate
set bmargin 6
plot """
)
        i = 1
        plots = []
        for a in self.authors_to_plot:
            i = i + 1
            author = a.replace("\"", "\\\"").replace("`", "")
            plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, author))
        f.write(", ".join(plots))
        f.write('\n')

        f.close()

        os.chdir(path)
        files = glob.glob(path + '/*.plot')
        for f in files:
            out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
            if len(out) > 0:
                print(out)

    def printHeader(self, f, title = ''):
        f.write(
"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>GitStats - %s</title>
    <link rel="stylesheet" href="%s" type="text/css">
    <meta name="generator" content="GitStats %s">
    <script type="text/javascript" src="sortable.js"></script>
</head>
<body>
""" % (self.title, conf['style'], getversion()))

    def printNav(self, f):
        f.write("""
<div class="nav">
<ul>
<li><a href="index.html">General</a></li>
<li><a href="activity.html">Activity</a></li>
<li><a href="authors.html">Authors</a></li>
<li><a href="files.html">Files</a></li>
<li><a href="lines.html">Lines</a></li>
<li><a href="tags.html">Tags</a></li>
</ul>
</div>
""")

def usage():
    print("""
Usage: gitstats [options] <gitpath..> <outputpath>

Options:
-c key=value     Override configuration value

Default config values:
%s

Please see the manual page for more details.
""" % conf)


class GitStats(object):
    def run(self, args_orig):
        optlist, args = getopt.getopt(args_orig, 'hc:', ["help"])
        for o,v in optlist:
            if o == '-c':
                key, value = v.split('=', 1)
                if key not in conf:
                    raise KeyError('no such key "%s" in config' % key)
                if isinstance(conf[key], int):
                    conf[key] = int(value)
                else:
                    conf[key] = value
            elif o in ('-h', '--help'):
                usage()
                sys.exit()

        if len(args) < 2:
            usage()
            sys.exit(0)

        outputpath = os.path.abspath(args[-1])
        rundir = os.getcwd()

        try:
            os.makedirs(outputpath)
        except OSError:
            pass
        if not os.path.isdir(outputpath):
            print('FATAL: Output path is not a directory or does not exist')
            sys.exit(1)

        if not getgnuplotversion():
            print('gnuplot not found')
            sys.exit(1)

        print('Output path: %s' % outputpath)
        cachefile = os.path.join(outputpath, 'gitstats.cache')

        data = GitDataCollector()
        data.loadCache(cachefile)

        for gitpath in args[0:-1]:
            print('Git path: %s' % gitpath)

            prevdir = os.getcwd()
            os.chdir(gitpath)

            print('Collecting data...')
            data.collect(gitpath)

            os.chdir(prevdir)

        print('Refining data...')
        data.saveCache(cachefile)
        data.refine()

        os.chdir(rundir)

        print('Generating report...')
        report = HTMLReportCreator()
        report.create(data, outputpath)

        time_end = time.time()
        exectime_internal = time_end - time_start
        print('Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal))
        if sys.stdin.isatty():
            print('You may now run:')
            print()
            print('   sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''"))
            print()

if __name__=='__main__':
    g = GitStats()
    g.run(sys.argv[1:])

