#!/usr/bin/python3 import collections import os.path import pickle import re from statistics import mean import sys from termcolor import colored from monkey.action import parse as parse_trace from monkey.patterns import get_patterns from prolog.util import parse as prolog_parse, rename_vars_list, stringify, tokenize # script arguments solutions_file = sys.argv[1] pid = int(sys.argv[2]) data_dir = sys.argv[3] attributes_file = os.path.join(data_dir, 'attributes') rules_file = os.path.join(data_dir, 'rules') users_file = os.path.join(data_dir, 'users-test') programs_file = os.path.join(data_dir, 'programs.pickle') # read test results for known programs test = pickle.load(open(programs_file, 'rb')) # read traces users = [int(line.strip()) for line in open(users_file, 'r').readlines()] traces = {} for solution in pickle.load(open(solutions_file, 'rb')): if solution.problem_id == pid and solution.codeq_user_id in users: traces[solution.codeq_user_id] = solution.trace # read attributes attributes = dict([line.strip().split('\t') for line in open(attributes_file, 'r').readlines()]) class Rule(collections.namedtuple('Rule', ['klass', 'condition', 'distribution', 'quality'])): def __str__(self): s = 'Rule: class = {}, distribution = {}, quality = {}\n'.format(self.klass, self.distribution, self.quality) s += ''.join([str(pattern) + '\n' for pattern, yes in self.condition]) return s # read rules rules = [] for line in open(rules_file, 'r').readlines(): match = re.match(r'IF ((?:a[0-9]*[^ ]*(?: AND )*)*) THEN correct=([TF]) *\[ *([0-9]*) *([0-9]*)\] *([0-9.]*)', line.strip()) if match: m = tuple(match.groups()) condition = tuple((attributes[field[:-3]], field.endswith('!=F')) for field in m[0].split(' AND ')) rules.append(Rule(m[-4], condition, (int(m[-3]), int(m[-2])), float(m[-1]))) #print(rules[-1]) else: print('Did not understand rule:', line.strip()) def color_print(text, ranges): i = 0 for start, length, color in sorted(ranges): # ignore overlapping ranges if start < i: continue print(text[i:start], end='') print(colored(text[start:start+length], color), end='') i = start + length print(text[i:]) # check if given patterns match the rule def check_rule(rule, patterns): ret_patterns = [] for rule_pattern, yes in rule.condition: if yes: # this pattern must be present for pattern, nodes in patterns: if pattern == rule_pattern: ret_patterns.append((rule_pattern, nodes)) else: # this pattern must not be present if rule_pattern in [p[0] for p in patterns]: return [] return ret_patterns # keep track of when each suggestion was applied all_suggestions = [] # programs with no matching rule unsuggestable = collections.Counter() for user, trace in traces.items(): # get submissions from trace programs = [] code = '' for action in parse_trace(trace): code = action.apply(code) if action.type == 'test': if prolog_parse(code) is None: continue normalized_code = stringify(rename_vars_list(tokenize(code))) if programs and normalized_code == programs[-1][0]: continue correct = test[normalized_code]['n_tests'] == test[normalized_code]['n_passed'] programs.append((normalized_code, correct)) # ignore actions after first correct submission if correct: break # ignore traces with no / only correct submissions if not any(p[1] for p in programs) or all(p[1] for p in programs): continue suggested = [] for i, (program, correct) in enumerate(programs): program_patterns = list(get_patterns(program)) #for p in program_patterns: # print(p[0]) #print() # check if previously suggested rules match for s in suggested: s['passed'] += 1 match = check_rule(s['rule'], program_patterns) if (s['rule'].klass == 'T' and len(match) == len(s['rule'].condition) or s['rule'].klass == 'F' and not match): s['matched'].append(s['passed']) # only check programs until first correct submission if correct: print(str(i) + ' PASS\t' + program) print() break # check rules in order, buggy rules first found = False for rule in ( [r for r in rules if r.klass == 'F'] + [r for r in rules if r.klass == 'T']): match = check_rule(rule, program_patterns) if (rule.klass == 'F' and not match or rule.klass == 'T' and len(match) != len(rule.condition)-1): continue found = True # store suggestion to see if it was implemented later if not any(s['program'] == program and s['rule'] == rule for s in suggested): # passed: how many submission before PASS # matched: list of submissions where the suggested rule matched # (the current submission has index 0, the next 1 and so on) suggested.append({'program': program, 'rule': rule, 'found': i, 'passed': 0, 'matched': []}) # get highlights highlight = set() for m in match: for n in m[1]: highlight.add((n[0].pos, len(n[0].val), ('green' if rule.klass == 'T' else 'red'))) # print highighted program print(str(i) + ' FAIL', end='\t') color_print(program, list(highlight)) # print rule for rule_pattern, yes in rule.condition: if rule.klass == 'T': if rule_pattern in [pattern for pattern, nodes in program_patterns]: print('good\t' + str(rule_pattern)) else: print('missing\t' + str(rule_pattern)) else: if rule_pattern in [pattern for pattern, nodes in program_patterns]: print('buggy\t' + str(rule_pattern)) print() break if not found: print(str(i) + ' FAIL\t' + str(program)) print() unsuggestable[program] += 1 print('Suggestions and versions in which they were implemented:') for s in suggested: index = len(programs) - (s['passed'] + 1) print(index, [index + m for m in s['matched']]) all_suggestions += suggested print('-'*30) print() # report not_matched = [s for s in all_suggestions if s['passed'] not in s['matched']] matched = [s for s in all_suggestions if s['passed'] in s['matched']] # rules that did / did not match in the solution good = collections.Counter() bad = collections.Counter() for s in all_suggestions: (good if s in matched else bad)[s['rule']] += 1 print('Statistics') print('----------') print('# of suggestions that were implemented:', len(matched)) print('# of suggestions that were not implemented:', len(not_matched)) print('avg. # of submissions before suggestion was implemented:', sum(s['matched'][0] for s in matched)/len(matched)) print('avg. # of submissions until PASS after suggestion was implemented:', sum(s['passed'] - s['matched'][0] for s in matched)/len(matched)) print('avg. # of submissions until PASS if suggestion was not implemented:', sum(s['passed'] for s in not_matched)/len(not_matched)) #print('avg. % of submissions after suggestion where it was not implemented :', 1-mean(len(s['matched'])/s['passed'] for s in matched)) print() print('Unsuggestable programs') print('----------------------') for p, count in unsuggestable.most_common(): print('{}\t{}'.format(count, p)) print() print('Good rules') print('----------') for r, count in good.most_common(): print('Suggested for ' + str(count) + ' submissions') print(r) print('Bad rules') print('---------') for r, count in bad.most_common(): print('Suggested for ' + str(count) + ' submissions') print(r)