From 27d4458613a5b61f16ad9bf59ca1de460fea3b3a Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 9 Jan 2017 18:07:23 +0100 Subject: First commit is the best commit --- test-rules.py | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100755 test-rules.py (limited to 'test-rules.py') diff --git a/test-rules.py b/test-rules.py new file mode 100755 index 0000000..bb89e7e --- /dev/null +++ b/test-rules.py @@ -0,0 +1,225 @@ +#!/usr/bin/python3 + +import collections +import os.path +import pickle +import re +from statistics import mean +import sys + +from termcolor import colored + +from monkey.action import parse as parse_trace +from monkey.patterns import get_patterns +from prolog.util import parse as prolog_parse, rename_vars_list, stringify, tokenize + +# script arguments +solutions_file = sys.argv[1] +pid = int(sys.argv[2]) +data_dir = sys.argv[3] + +attributes_file = os.path.join(data_dir, 'attributes') +rules_file = os.path.join(data_dir, 'rules') +users_file = os.path.join(data_dir, 'users-test') +programs_file = os.path.join(data_dir, 'programs.pickle') + +# read test results for known programs +test = pickle.load(open(programs_file, 'rb')) + +# read traces +users = [int(line.strip()) for line in open(users_file, 'r').readlines()] +traces = {} +for solution in pickle.load(open(solutions_file, 'rb')): + if solution.problem_id == pid and solution.codeq_user_id in users: + traces[solution.codeq_user_id] = solution.trace + +# read attributes +attributes = dict([line.strip().split('\t') for line in open(attributes_file, 'r').readlines()]) + +class Rule(collections.namedtuple('Rule', ['klass', 'condition', 'distribution', 'quality'])): + def __str__(self): + s = 'Rule: class = {}, distribution = {}, quality = {}\n'.format(self.klass, self.distribution, self.quality) + s += ''.join([str(pattern) + '\n' for pattern, yes in self.condition]) + return s + +# read rules +rules = [] +for line in open(rules_file, 'r').readlines(): + match = re.match(r'IF ((?:a[0-9]*[^ ]*(?: AND )*)*) THEN correct=([TF]) *\[ *([0-9]*) *([0-9]*)\] *([0-9.]*)', line.strip()) + if match: + m = tuple(match.groups()) + condition = tuple((attributes[field[:-3]], field.endswith('!=F')) for field in m[0].split(' AND ')) + rules.append(Rule(m[-4], condition, (int(m[-3]), int(m[-2])), float(m[-1]))) + #print(rules[-1]) + else: + print('Did not understand rule:', line.strip()) + +def color_print(text, ranges): + i = 0 + for start, length, color in sorted(ranges): + # ignore overlapping ranges + if start < i: + continue + print(text[i:start], end='') + print(colored(text[start:start+length], color), end='') + i = start + length + print(text[i:]) + +# check if given patterns match the rule +def check_rule(rule, patterns): + ret_patterns = [] + for rule_pattern, yes in rule.condition: + if yes: + # this pattern must be present + for pattern, nodes in patterns: + if pattern == rule_pattern: + ret_patterns.append((rule_pattern, nodes)) + else: + # this pattern must not be present + if rule_pattern in [p[0] for p in patterns]: + return [] + return ret_patterns + +# keep track of when each suggestion was applied +all_suggestions = [] +# programs with no matching rule +unsuggestable = collections.Counter() + +for user, trace in traces.items(): + # get submissions from trace + programs = [] + code = '' + for action in parse_trace(trace): + code = action.apply(code) + if action.type == 'test': + if prolog_parse(code) is None: + continue + normalized_code = stringify(rename_vars_list(tokenize(code))) + if programs and normalized_code == programs[-1][0]: + continue + correct = test[normalized_code]['n_tests'] == test[normalized_code]['n_passed'] + programs.append((normalized_code, correct)) + # ignore actions after first correct submission + if correct: + break + + # ignore traces with no / only correct submissions + if not any(p[1] for p in programs) or all(p[1] for p in programs): + continue + + suggested = [] + for i, (program, correct) in enumerate(programs): + program_patterns = list(get_patterns(program)) + #for p in program_patterns: + # print(p[0]) + #print() + + # check if previously suggested rules match + for s in suggested: + s['passed'] += 1 + match = check_rule(s['rule'], program_patterns) + if (s['rule'].klass == 'T' and len(match) == len(s['rule'].condition) or + s['rule'].klass == 'F' and not match): + s['matched'].append(s['passed']) + + # only check programs until first correct submission + if correct: + print(str(i) + ' PASS\t' + program) + print() + break + + # check rules in order, buggy rules first + found = False + for rule in ( + [r for r in rules if r.klass == 'F'] + + [r for r in rules if r.klass == 'T']): + match = check_rule(rule, program_patterns) + if (rule.klass == 'F' and not match or + rule.klass == 'T' and len(match) != len(rule.condition)-1): + continue + found = True + + # store suggestion to see if it was implemented later + if not any(s['program'] == program and s['rule'] == rule for s in suggested): + # passed: how many submission before PASS + # matched: list of submissions where the suggested rule matched + # (the current submission has index 0, the next 1 and so on) + suggested.append({'program': program, 'rule': rule, 'found': i, 'passed': 0, 'matched': []}) + + # get highlights + highlight = set() + for m in match: + for n in m[1]: + highlight.add((n[0].pos, len(n[0].val), ('green' if rule.klass == 'T' else 'red'))) + + # print highighted program + print(str(i) + ' FAIL', end='\t') + color_print(program, list(highlight)) + + # print rule + for rule_pattern, yes in rule.condition: + if rule.klass == 'T': + if rule_pattern in [pattern for pattern, nodes in program_patterns]: + print('good\t' + str(rule_pattern)) + else: + print('missing\t' + str(rule_pattern)) + else: + if rule_pattern in [pattern for pattern, nodes in program_patterns]: + print('buggy\t' + str(rule_pattern)) + print() + break + + if not found: + print(str(i) + ' FAIL\t' + str(program)) + print() + unsuggestable[program] += 1 + + print('Suggestions and versions in which they were implemented:') + for s in suggested: + index = len(programs) - (s['passed'] + 1) + print(index, [index + m for m in s['matched']]) + all_suggestions += suggested + + print('-'*30) + print() + +# report +not_matched = [s for s in all_suggestions if s['passed'] not in s['matched']] +matched = [s for s in all_suggestions if s['passed'] in s['matched']] + +# rules that did / did not match in the solution +good = collections.Counter() +bad = collections.Counter() +for s in all_suggestions: + (good if s in matched else bad)[s['rule']] += 1 + +print('Statistics') +print('----------') +print('# of suggestions that were implemented:', len(matched)) +print('# of suggestions that were not implemented:', len(not_matched)) +print('avg. # of submissions before suggestion was implemented:', + sum(s['matched'][0] for s in matched)/len(matched)) +print('avg. # of submissions until PASS after suggestion was implemented:', + sum(s['passed'] - s['matched'][0] for s in matched)/len(matched)) +print('avg. # of submissions until PASS if suggestion was not implemented:', + sum(s['passed'] for s in not_matched)/len(not_matched)) +#print('avg. % of submissions after suggestion where it was not implemented :', 1-mean(len(s['matched'])/s['passed'] for s in matched)) +print() + +print('Unsuggestable programs') +print('----------------------') +for p, count in unsuggestable.most_common(): + print('{}\t{}'.format(count, p)) +print() + +print('Good rules') +print('----------') +for r, count in good.most_common(): + print('Suggested for ' + str(count) + ' submissions') + print(r) + +print('Bad rules') +print('---------') +for r, count in bad.most_common(): + print('Suggested for ' + str(count) + ' submissions') + print(r) -- cgit v1.2.1