#!/usr/bin/python3

import collections
import os.path
import pickle
import re
from statistics import mean
import sys

from termcolor import colored

from monkey.action import parse as parse_trace
from monkey.patterns import get_patterns
from prolog.util import parse as prolog_parse, rename_vars_list, stringify, tokenize

# script arguments
solutions_file = sys.argv[1]
pid = int(sys.argv[2])
data_dir = sys.argv[3]

attributes_file = os.path.join(data_dir, 'attributes')
rules_file = os.path.join(data_dir, 'rules')
users_file = os.path.join(data_dir, 'users-test')
programs_file = os.path.join(data_dir, 'programs.pickle')

# read test results for known programs
test = pickle.load(open(programs_file, 'rb'))

# read traces
users = [int(line.strip()) for line in open(users_file, 'r').readlines()]
traces = {}
for solution in pickle.load(open(solutions_file, 'rb')):
    if solution.problem_id == pid and solution.codeq_user_id in users:
        traces[solution.codeq_user_id] = solution.trace

# read attributes
attributes = dict([line.strip().split('\t') for line in open(attributes_file, 'r').readlines()])

class Rule(collections.namedtuple('Rule', ['klass', 'condition', 'distribution', 'quality'])):
    def __str__(self):
        s = 'Rule: class = {}, distribution = {}, quality = {}\n'.format(self.klass, self.distribution, self.quality)
        s += ''.join([str(pattern) + '\n' for pattern, yes in self.condition])
        return s

# read rules
rules = []
for line in open(rules_file, 'r').readlines():
    match = re.match(r'IF ((?:a[0-9]*[^ ]*(?: AND )*)*) THEN correct=([TF]) *\[ *([0-9]*) *([0-9]*)\] *([0-9.]*)', line.strip())
    if match:
        m = tuple(match.groups())
        condition = tuple((attributes[field[:-3]], field.endswith('!=F')) for field in m[0].split(' AND '))
        rules.append(Rule(m[-4], condition, (int(m[-3]), int(m[-2])), float(m[-1])))
        #print(rules[-1])
    else:
        print('Did not understand rule:', line.strip())

def color_print(text, ranges):
    i = 0
    for start, length, color in sorted(ranges):
        # ignore overlapping ranges
        if start < i:
            continue
        print(text[i:start], end='')
        print(colored(text[start:start+length], color), end='')
        i = start + length
    print(text[i:])

# check if given patterns match the rule
def check_rule(rule, patterns):
    ret_patterns = []
    for rule_pattern, yes in rule.condition:
        if yes:
            # this pattern must be present
            for pattern, nodes in patterns:
                if pattern == rule_pattern:
                    ret_patterns.append((rule_pattern, nodes))
        else:
            # this pattern must not be present
            if rule_pattern in [p[0] for p in patterns]:
                return []
    return ret_patterns

# keep track of when each suggestion was applied
all_suggestions = []
# programs with no matching rule
unsuggestable = collections.Counter()

for user, trace in traces.items():
    # get submissions from trace
    programs = []
    code = ''
    for action in parse_trace(trace):
        code = action.apply(code)
        if action.type == 'test':
            if prolog_parse(code) is None:
                continue
            normalized_code = stringify(rename_vars_list(tokenize(code)))
            if programs and normalized_code == programs[-1][0]:
                continue
            correct = test[normalized_code]['n_tests'] == test[normalized_code]['n_passed']
            programs.append((normalized_code, correct))
            # ignore actions after first correct submission
            if correct:
                break

    # ignore traces with no / only correct submissions
    if not any(p[1] for p in programs) or all(p[1] for p in programs):
        continue

    suggested = []
    for i, (program, correct) in enumerate(programs):
        program_patterns = list(get_patterns(program))
        #for p in program_patterns:
        #    print(p[0])
        #print()

        # check if previously suggested rules match
        for s in suggested:
            s['passed'] += 1
            match = check_rule(s['rule'], program_patterns)
            if (s['rule'].klass == 'T' and len(match) == len(s['rule'].condition) or
                s['rule'].klass == 'F' and not match):
                s['matched'].append(s['passed'])

        # only check programs until first correct submission
        if correct:
            print(str(i) + ' PASS\t' + program)
            print()
            break

        # check rules in order, buggy rules first
        found = False
        for rule in (
                [r for r in rules if r.klass == 'F'] +
                [r for r in rules if r.klass == 'T']):
            match = check_rule(rule, program_patterns)
            if (rule.klass == 'F' and not match or
                rule.klass == 'T' and len(match) != len(rule.condition)-1):
                continue
            found = True

            # store suggestion to see if it was implemented later
            if not any(s['program'] == program and s['rule'] == rule for s in suggested):
                # passed: how many submission before PASS
                # matched: list of submissions where the suggested rule matched
                #          (the current submission has index 0, the next 1 and so on)
                suggested.append({'program': program, 'rule': rule, 'found': i, 'passed': 0, 'matched': []})

            # get highlights
            highlight = set()
            for m in match:
                for n in m[1]:
                    highlight.add((n[0].pos, len(n[0].val), ('green' if rule.klass == 'T' else 'red')))

            # print highighted program
            print(str(i) + ' FAIL', end='\t')
            color_print(program, list(highlight))

            # print rule
            for rule_pattern, yes in rule.condition:
                if rule.klass == 'T':
                    if rule_pattern in [pattern for pattern, nodes in program_patterns]:
                        print('good\t' + str(rule_pattern))
                    else:
                        print('missing\t' + str(rule_pattern))
                else:
                    if rule_pattern in [pattern for pattern, nodes in program_patterns]:
                        print('buggy\t' + str(rule_pattern))
            print()
            break

        if not found:
            print(str(i) + ' FAIL\t' + str(program))
            print()
            unsuggestable[program] += 1

    print('Suggestions and versions in which they were implemented:')
    for s in suggested:
        index = len(programs) - (s['passed'] + 1)
        print(index, [index + m for m in s['matched']])
    all_suggestions += suggested

    print('-'*30)
    print()

# report
not_matched = [s for s in all_suggestions if s['passed'] not in s['matched']]
matched = [s for s in all_suggestions if s['passed'] in s['matched']]

# rules that did / did not match in the solution
good = collections.Counter()
bad = collections.Counter()
for s in all_suggestions:
    (good if s in matched else bad)[s['rule']] += 1

print('Statistics')
print('----------')
print('# of suggestions that were implemented:', len(matched))
print('# of suggestions that were not implemented:', len(not_matched))
print('avg. # of submissions before suggestion was implemented:',
        sum(s['matched'][0] for s in matched)/len(matched))
print('avg. # of submissions until PASS after suggestion was implemented:',
        sum(s['passed'] - s['matched'][0] for s in matched)/len(matched))
print('avg. # of submissions until PASS if suggestion was not implemented:',
        sum(s['passed'] for s in not_matched)/len(not_matched))
#print('avg. % of submissions after suggestion where it was not implemented :', 1-mean(len(s['matched'])/s['passed'] for s in matched))
print()

print('Unsuggestable programs')
print('----------------------')
for p, count in unsuggestable.most_common():
    print('{}\t{}'.format(count, p))
print()

print('Good rules')
print('----------')
for r, count in good.most_common():
    print('Suggested for ' + str(count) + ' submissions')
    print(r)

print('Bad rules')
print('---------')
for r, count in bad.most_common():
    print('Suggested for ' + str(count) + ' submissions')
    print(r)