#!/usr/bin/python3 import argparse import collections import json import os.path import re from statistics import mean from termcolor import colored from monkey.patterns import get_patterns parser = argparse.ArgumentParser(description='Evaluate rules on student programs.') parser.add_argument('path', help='path to data directory') args = parser.parse_args() data_dir = args.path # klass: T/F # condition: list of patterns # distribution: rule class distribution # quality: rule quality class Rule(collections.namedtuple('Rule', ['klass', 'condition', 'distribution', 'quality'])): def __str__(self): s = 'Rule: class = {}, distribution = {}, quality = {}\n'.format(self.klass, self.distribution, self.quality) s += ''.join([str(pattern) + '\n' for pattern in self.condition]) return s # program: submitted code # correct: does this submission pass all tests? # patterns: patterns in this submission # hint: suggested hint class Submission(collections.namedtuple('Submission', ['program', 'correct', 'patterns', 'hint'])): pass # ok: required patterns already in program (unused) # remove: patterns that should be removed # add: patterns that should be added (intersection from all relevant rules) # add_alternatives: patterns that should be added (union from all relevant rules) # NOTE currently either (only remove is set) or (both add and add_alternatives are set) class Hint(collections.namedtuple('Hint', ['ok', 'remove', 'add', 'add_alternatives'])): pass # read attributes attributes_file = os.path.join(data_dir, 'attributes.tab') attributes = dict([line.strip().split('\t') for line in open(attributes_file, 'r').readlines()]) attributes_ordered = [line.strip().split('\t')[1] for line in open(attributes_file, 'r').readlines()] # read rules rules_file = os.path.join(data_dir, 'rules.txt') rules = [] for line in open(rules_file, 'r').readlines(): match = re.match(r'IF ((?:a[0-9]*!=F(?: AND )*)*) THEN correct=([TF]) *\[ *([0-9]*) *([0-9]*)\] *([0-9.]*)', line.strip()) if match: m = tuple(match.groups()) condition = tuple(attributes[field[:-3]] for field in m[0].split(' AND ')) rules.append(Rule(m[-4], condition, (int(m[-3]), int(m[-2])), float(m[-1]))) else: print('Did not understand rule:', line.strip()) # export rules for tutor json_file = os.path.join(data_dir, 'bugs.json') json_data = { 'patterns': attributes_ordered, 'rules': [{ 'condition': r.condition, 'class': r.klass == 'T', 'distribution': r.distribution, 'quality': r.quality, } for r in rules], } with open(json_file, 'w') as f: json.dump(json_data, f, sort_keys=True, indent=2) def color_print(text, ranges): i = 0 for start, length, color in sorted(ranges): # ignore overlapping ranges if start < i: continue print(text[i:start], end='') print(colored(text[start:start+length], color), end='') i = start + length print(text[i:]) # generate marks for selected patterns for color_print def mark(patterns, selected, color): marks = set() for pattern, nodes in patterns: if pattern in selected: marks |= set((n[0].pos, len(n[0].val), color) for n in nodes if n[0].pos) return marks # return a hint for the best applicable buggy rule def suggest_buggy(rules, patterns): for rule in [r for r in rules if r.klass == 'F']: # suggest this rule if all patterns in condition are found in the program if all(rule_pattern in [p[0] for p in patterns] for rule_pattern in rule.condition): return Hint(ok=[], remove=rule.condition, add=[], add_alternatives=[]) return None # return a hint for the best applicable true rule def suggest_true(rules, patterns): # get match info for all true rules rule_matches = collections.defaultdict(list) for rule in [r for r in rules if r.klass == 'T']: found = set() missing = set() for rule_pattern in rule.condition: if any(pattern == rule_pattern for pattern, nodes in patterns): found.add(rule_pattern) else: missing.add(rule_pattern) if missing: rule_matches[len(found)].append((found, missing)) # return rules with most matching patterns for i in range(10, 0, -1): if i not in rule_matches: continue missing_patterns = collections.Counter() for found, missing in rule_matches[i]: for pattern in missing: missing_patterns[pattern] += 1 best_missing_patterns = [] for missing_pattern, count in missing_patterns.most_common(): if count == missing_patterns.most_common()[0][1]: best_missing_patterns.append(missing_pattern) else: break add = [] for pattern in attributes_ordered: if pattern in best_missing_patterns: add = [pattern] break add_alternatives = [pattern for pattern, _ in missing_patterns.most_common() if pattern not in add] return Hint(ok=[], remove=[], add=add, add_alternatives=add_alternatives) return None # read traces users_file = os.path.join(data_dir, 'users-test.txt') users = [int(line.strip()) for line in open(users_file, 'r').readlines()] # evaluate hints on student traces submissions = collections.defaultdict(list) for user in users: user_subs = [] user_dir = os.path.join(data_dir, 'submissions', str(user)) # each submission is in a file named -- for submission in sorted(os.listdir(user_dir), key=lambda x: int(x.split('-')[0])): seq, total, passed = submission.split('-') correct = total == passed with open(os.path.join(user_dir, submission), 'r') as f: code = f.read() # check rules for this submission program_patterns = list(get_patterns(code)) hint = suggest_buggy(rules, program_patterns) if not hint: hint = suggest_true(rules, program_patterns) user_subs.append(Submission(code, correct, program_patterns, hint)) # skip submissions after the first correct program if correct: break # ignore traces with no / only correct submissions if (not any(s.correct for s in user_subs) or all(s.correct for s in user_subs)): continue submissions[user] = user_subs # print submissions with hints for debugging for s in user_subs: print('PASS' if s.correct else 'FAIL', end='\t') marks = [] if s.hint and s.hint.remove: marks = mark(s.patterns, s.hint.remove, 'red') color_print(s.program, marks) if s.hint: for x in s.hint.remove: print('buggy\t', x) for x in s.hint.add: print('missing\t', x) for x in s.hint.add_alternatives: print('alternative\t', x) print() print('-'*30) print() # submissions where hint pattern was implemented in the solution good_hint = [] # submissions where one of the alternative hint patterns was implemented in the solution medium_hint = [] # submissions where none of the hint patterns were implemented in the solution bad_hint = [] # submissions with no suggestions no_hint = [] # total number of submissions n_subs = 0 for user, subs in submissions.items(): solution = subs[-1] solution_patterns = [p[0] for p in solution.patterns] for s in subs[:-1]: n_subs += 1 if s.hint: if s.hint.remove: # buggy rule: at least one pattern should not be present in solution if any(pattern not in solution_patterns for pattern in s.hint.remove): good_hint.append(s) else: bad_hint.append(s) else: # true rule: all patterns should be present in solution if all(pattern in solution_patterns for pattern in s.hint.add): # best suggested pattern(s) match good_hint.append(s) elif any(pattern in solution_patterns for pattern in s.hint.add_alternatives): # some suggested pattern(s) match medium_hint.append(s) else: bad_hint.append(s) else: no_hint.append(s) print('Statistics') print('----------') print('avg. submissions per trace:', mean(len(subs) for subs in submissions.values())) print('avg. clauses in solution:', mean(subs[-1].program.count('.') for subs in submissions.values())) print('total submissions:', n_subs) print('positive hints (best implemented):', len([s for s in good_hint if s.hint.add])) print('positive hints (alternative implemented):', len([s for s in medium_hint if s.hint.add_alternatives])) print('positive hints (not implemented):', len([s for s in bad_hint if s.hint.add])) print('buggy hints (implemented):', len([s for s in good_hint if s.hint.remove])) print('buggy hints (not implemented):', len([s for s in bad_hint if s.hint.remove])) print('no hints:', len(no_hint))