diff options
Diffstat (limited to 'monkey')
-rw-r--r--[-rwxr-xr-x] | monkey/__init__.py | 333 | ||||
-rw-r--r-- | monkey/edits.py | 332 | ||||
-rwxr-xr-x | monkey/test.py | 61 |
3 files changed, 240 insertions, 486 deletions
diff --git a/monkey/__init__.py b/monkey/__init__.py index 09f77b0..db00493 100755..100644 --- a/monkey/__init__.py +++ b/monkey/__init__.py @@ -18,8 +18,7 @@ import math import time import prolog.engine -from .edits import classify_edits -from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify +from prolog.util import Token, tokenize, rename_vars, stringify, parse, interesting_ranges, rename_vars_list from .util import damerau_levenshtein, PQueue # Starting from [code], find a sequence of edits that transforms it into a @@ -27,216 +26,45 @@ from .util import damerau_levenshtein, PQueue # programs. # Return (solution, edits, time spent, #programs checked). If no solution is # found within [timeout] seconds, return solution='' and edits=[]. -def fix(name, code, edits, test, timeout=30, debug=False): - # A dictionary of edits with costs for each edit type (insert, remove or - # change a line). Edits are tuples (before, after), where before and after - # are sequences of tokens. Variable names are normalized to A0, A1, A2,…. - inserts, removes, changes = classify_edits(edits) - - # Generate states that can be reached from the given program with one edit. - # The program is given as a list of tokens. +def fix(code, edits, test, timeout=30, debug=False): def step(program, path=None): - # Apply edits to program in order; skip tokens with index lower than - # last step. - start = path[-1][4] if path else 0 - - for i, token in enumerate(program): - # Get variable names in the current rule. - if i == 0 or program[i-1].type == 'PERIOD': - variables = [] - for t in program[i:]: - if t.type == 'VARIABLE' and not t.val.startswith('_'): - if t.val not in variables: - variables.append(t.val) - elif t.type == 'PERIOD': - break - - # Remember the first token in the current part. - if i == 0 or program[i-1].stop: - first = i - - # Don't edit already modified parts of the program. - if i < start: - continue - - # Add a new fact at beginning or after another rule. - if i == 0 or token.type == 'PERIOD': - new_start = i+1 if token.type == 'PERIOD' else 0 - n_rules = program.count(Token('PERIOD', '.')) - rule = 0 if i == 0 else program[i-1].rule+1 # index of new rule - - for new, cost in inserts.items(): - # New rule must start with correct predicate name. - if not (new[0].type == 'NAME' and new[0].val in name): - continue - - # Here we always insert a fact, so replace trailing :- with .. - if new[-1].type == 'FROM': - new = new[:-1] - - new_real = tuple([t.clone(rule=rule, part=0) for t in new + (Token('PERIOD', '.', stop=True),)]) - new_after = tuple([t.clone(rule=t.rule+1) for t in program[new_start:]]) - - new_program = program[:new_start] + new_real + new_after - new_step = ('add_rule', new_start, (), new_real, new_start) - new_cost = cost * math.pow(0.3, n_rules) - yield (new_program, new_step, new_cost) - - if token.stop and i > first: - real_last = last = i+1 - if token.type != 'FROM': - real_last -= 1 - part = program[first:real_last] - part_whole = program[first:last] - part_normal = tuple(rename_vars(part)) - - # Apply each edit a→b where a matches this part. - seen = False # has there been such an edit? - for (a, b), cost in changes.items(): - if part_normal == a: - seen = True - if b[-1].type == 'FROM': - b = b[:-1] + (b[-1].clone(stop=True),) - b_real = tuple([t.clone(rule=program[first].rule, - part=program[first].part) - for t in map_vars(a, b, part, variables)]) - - new_program = program[:first] + b_real + program[real_last:] - new_step = ('change_part', first, part, b_real, first) - yield (new_program, new_step, cost) - - - # Remove this part. - if token.type in ('COMMA', 'SEMI'): - if part_normal in removes.keys() or not seen: - new_after = list(program[last:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(part=t.part-1) - new_program = program[:first] + tuple(new_after) - new_step = ('remove_part', first, part_whole, (), first-1) - new_cost = removes.get(part_normal, 1.0) * 0.99 - yield (new_program, new_step, new_cost) - - # Remove this part at the end of the current rule. - if token.type == 'PERIOD' and token.part > 0: - if part_normal in removes.keys() or not seen: - if token.part == 0: # part is fact, remove rule - new_after = list(program[last+1:]) - for j, t in enumerate(new_after): - new_after[j] = t.clone(rule=t.rule-1) - new_program = program[:first] + tuple(new_after) - new_step = ('remove_rule', first, part, (), first) - else: # part is subgoal, remove part - new_after = list(program[last-1:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(part=t.part-1) - new_program = program[:first-1] + tuple(new_after) - new_step = ('remove_part', first-1, (program[first-1],) + part, (), first-1) - - new_cost = removes.get(part_normal, 1.0) * 0.99 - yield (new_program, new_step, new_cost) - - - # Insert a new part (goal) after this part. - if token.type in ('COMMA', 'FROM'): - for new, cost in inserts.items(): - # Don't try to insert a head into the body. - if new[-1].type == 'FROM': - continue - - new_real = tuple([t.clone(rule=program[first].rule, - part=program[first].part+1) - for t in map_vars([], new, [], variables) + [Token('COMMA', ',', stop=True)]]) - - new_after = list(program[last:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(rule=program[first].rule, part=t.part+1) - - new_program = program[:last] + new_real + tuple(new_after) - new_step = ('add_part', last, (), new_real, last) - new_cost = cost * 0.4 - yield (new_program, new_step, new_cost) - - # Insert a new part (goal) at the end of current rule. - if token.type == 'PERIOD': - for new, cost in inserts.items(): - # Don't try to insert a head into the body. - if new[-1].type == 'FROM': - continue - - prepend = Token('FROM', ':-') if token.part == 0 else Token('COMMA', ',') - new_real = (prepend.clone(stop=True, rule=token.rule, part=token.part),) + \ - tuple([t.clone(rule=token.rule, part=token.part+1) - for t in map_vars([], new, [], variables)]) - - new_after = list(program[last-1:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(rule=t.rule, part=t.part+1) - - new_program = program[:last-1] + new_real + tuple(new_after) - new_step = ('add_part', last-1, (), new_real, last) - new_cost = cost * 0.4 - yield (new_program, new_step, new_cost) - - # Return a cleaned-up list of steps. - def postprocess(steps): - new_steps = [] - for step in steps: - # Remove the last field from each step as it is unnecessary after a - # path has been found. - step = step[:4] - if new_steps: - prev = new_steps[-1] - if prev[0] == 'remove_part' and step[0] == 'remove_part' and \ - prev[1] == step[1]: - new_steps[-1] = ('remove_part', prev[1], prev[2]+step[2], ()) - continue - - if prev[0] == 'remove_part' and step[0] == 'add_part' and \ - prev[1] == step[1]: - new_steps[-1] = ('change_part', prev[1], prev[2], step[3]) - continue - - if prev[0] == 'change_part' and step[0] == 'change_part' and \ - prev[1] == step[1] and step[2] == prev[3]: - new_steps[-1] = ('change_part', prev[1], prev[2], step[3]) - continue - - if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \ - prev[1] == step[1] and step[2] == prev[3][:-1]: - new_steps[-1] = (prev[0], prev[1], prev[2], step[3]+(prev[3][-1],)) - continue - - if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \ - step[1] == prev[1]+1 and step[2] == prev[3][1:]: - new_steps[-1] = (prev[0], prev[1], prev[2], (prev[3][0],)+step[3]) - continue - new_steps.append(step) - for step in new_steps: - print('index {}: {} {} → {}'.format( - step[1], step[0], stringify(step[2]), stringify(step[3]))) - return new_steps + tokens = program.leaves() + for part, range_path in interesting_ranges(program): + names = {} + part_normal = tuple(rename_vars_list(part, names)) + for (path, a, b), p in edits.items(): + if path == range_path and a == part_normal: + reverse_names = {v: k for k, v in names.items()} + b_real = tuple(rename_vars(b, reverse_names)) + new_tokens = [] + idx = None + for i, t in enumerate(tokens): + if t.pos >= part[0].pos and t.pos+len(t.val) <= part[-1].pos+len(part[-1].val): + if idx is None: + idx = i + new_tokens.extend(b_real) + else: + new_tokens.append(t) + new_code = stringify(new_tokens) + new_program = parse(new_code) + new_step = (idx, tuple(part), b_real) + if new_program is not None: + yield (new_program.freeze(), new_step, p) # Main loop: best-first search through generated programs. todo = PQueue() # priority queue of candidate solutions done = set() # programs we have already visited + n_tested = 0 + start_time = time.monotonic() + total_time = 0 # Each program gets a task with the sequence of edits that generated it and # the associated cost. First candidate with cost 1 is the initial program. - program = tuple(annotate(code)) - todo.push((program, (), 1.0), -1.0) + program = parse(code) + if program is None: + return '', [], total_time, n_tested + todo.push((program.freeze(), (), 1.0), -1.0) - n_tested = 0 - start_time = time.monotonic() - total_time = 0 while total_time < timeout: # Get next candidate. task = todo.pop() @@ -245,7 +73,7 @@ def fix(name, code, edits, test, timeout=30, debug=False): program, path, path_cost = task # If we have already seen this code, skip it. - code = compose(program) + code = stringify(program) if code in done: continue done.add(code) @@ -253,12 +81,11 @@ def fix(name, code, edits, test, timeout=30, debug=False): # Print some info about the current task. if debug: print('Cost {:.12f}'.format(path_cost)) - for step_type, idx, a, b, _ in path: - print('index {}: {} {} → {}'.format(idx, step_type, stringify(a), stringify(b))) + for idx, a, b in path: + print('{}: {} → {}'.format(idx, stringify(a), stringify(b))) # If the code is correct, we are done. if test(code): - path = postprocess(path) return code, path, total_time, n_tested n_tested += 1 @@ -274,66 +101,32 @@ def fix(name, code, edits, test, timeout=30, debug=False): return '', [], total_time, n_tested -# Return tuples (type, start, end, message) describing edits in [path]. -def fix_hints(code, path): - program = list(annotate(code)) - - for step_type, idx, a, b in path: - if step_type == 'add_rule': - fix_type = 'insert' - msg = 'Add another rule.' - start = program[idx].pos-1 - end = start+2 - elif step_type == 'add_part': - fix_type = 'insert' - msg = 'Add another goal to this rule.' - start = program[idx].pos-1 - end = start+2 - elif step_type == 'remove_rule': - fix_type = 'remove' - msg = 'Remove this rule.' - start = program[idx].pos - end = program[idx + len(a) - 1].pos - elif step_type == 'remove_part': - fix_type = 'remove' - msg = 'Remove this goal.' - start = program[idx].pos - end = idx + len(a) - 1 - if program[end].type in ('COMMA', 'PERIOD', 'SEMI'): - end -= 1 - end = program[end].pos + len(program[end].val) - elif step_type == 'change_part': - fix_type = 'change' - msg = 'Check this part.' - first = 0 - while idx+first < len(program)-1 and first < len(a) and first < len(b) and a[first] == b[first]: - first += 1 - last = len(a)-1 - while last < len(b) and last >= first and a[last] == b[last]: - last -= 1 - start = program[idx+first].pos - end = program[idx+last].pos + len(program[idx+last].val) - - program[idx:idx+len(a)] = [t.clone(pos=program[idx].pos) for t in b] - yield fix_type, start, end, msg - - -# Checks for typos in the code and suggest the nearst uploaded term by other users. -def check_typos(code, names): - for token in annotate(code): - if token.type == 'NAME': - nearest_name = ' ' - nearest_dist = 1000 - own_count = names.get(token.val, 0) # count of the token.val which is compared with the - # each name in the names - for name in names.items(): - if name[0] == token.val: # If the names are the skip the code - continue - - distance = damerau_levenshtein(token.val, name[0]) - - if distance < nearest_dist and distance > 0 and own_count < name[1]: - nearest_dist = distance # Set best_dist and best_name if the less one is found - nearest_name = name[0] - if nearest_dist > 0 and nearest_dist/len(nearest_name) <= 1/3: - yield 'typo', token.pos, token.pos + len(token.val) , 'Did you mean "{}"?'.format(nearest_name) +def min_diff(a, b): + first = 0 + while first < len(a) and first < len(b) and a[first] == b[first]: + first += 1 + last = 0 + while first-last < len(a) and first-last < len(b) and a[last-1] == b[last-1]: + last -= 1 + return first, last + +def fix_hints(code, steps): + hints = [] + tokens = tokenize(code) + for idx, a, b in steps: + start, end = min_diff(a, b) + + if start >= len(a)+end: + hint_id = 'monkey_insert' + elif start >= len(b)+end: + hint_id = 'monkey_remove' + else: + hint_id = 'monkey_change' + + pos_start = tokens[idx+start].pos + pos_end = tokens[idx+len(a)+end-1].pos + len(tokens[idx+len(a)+end-1].val) + pos_end = max(pos_end, pos_start+1) + + hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}] + tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b] + return hints diff --git a/monkey/edits.py b/monkey/edits.py index 17f13d3..7744fa2 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -19,26 +19,20 @@ import math from .action import expand, parse from .graph import Node -from prolog.util import annotate, normalized, rename_vars, stringify, tokenize +from prolog.util import Token, normalized, parse as prolog_parse, rename_vars, rename_vars_ast2, rename_vars_list, interesting_ranges, stringify, tokenize from .util import get_line, avg, logistic -# Parse the sequence of actions in [trace] and return a directed acyclic graph -# representing development history. Each node represents a particular version -# of some line, and each edge represents a "line edit" (contiguous sequence of -# inserts/removes within a single line). -# Return a list of nodes with root as the first element. Also return sets of -# submissions (user-tested program versions) and queries in this attempt. -def trace_graph(trace): - # Return values. - nodes = [Node([0, 0, ()])] # Node data: rank (Y), line no. (X), and tokens. +def get_edits_from_trace(trace, test, id): submissions = set() # Program versions at 'test' actions. queries = set() # Queries run by the student. - # State variables. - leaves = {0: nodes[0]} # Current leaf node for each line. - rank = 1 # Rank (order / y-position) for the next node. - code_next = '' # Program code after applying the current action. - done = False # Set to True on first correct version. + # For each observed edit, store a list of features (with repeats) of ASTs + # where they were observed. + edits = collections.defaultdict(list) + def add_edit(path, start, end, tree): + if start == end: + return + edits[(path, start, end)].append(id) # Parse trace actions and ensure there is a separate action for each # inserted/removed character. @@ -49,194 +43,139 @@ def trace_graph(trace): # Only a few traces fail to parse, so just skip them. actions = [] + # State variables. + open_edits = [] + code_next = '' # Program code after applying the current action. + done = False # Set to True on first correct version. + prev_tree = None + prev_action = None + for action_id, action in enumerate(actions): code = code_next code_next = action.apply(code) - if action.type == 'test': - submissions.add((code, action.total == action.passed)) - if action.total == action.passed: - done = True - - elif action.type == 'solve' or action.type == 'solve_all': - queries.add(action.query.rstrip(' .')) - - elif action.type == 'insert' or action.type == 'remove': - # Ignore edits after the first correct version. - if done: - continue - - # Number of the changed line. - line = code[:action.offset].count('\n') - # Current leaf node for this line. - parent = leaves[line] - # Tokens in this line after applying [action]. - tokens_next = tuple(tokenize(get_line(code_next, line))) - - # If a new node is inserted, clone each leaf into the next rank. - # This makes it easier to print the graph for graphviz; when - # analyzing the graph, duplicate nodes without siblings should be - # ignored. - new_leaves = {} - - if action.text == '\n': - if action.type == 'insert': - tokens_next_right = tuple(tokenize(get_line(code_next, line+1))) - - child_left = Node([rank, line, tokens_next]) - parent.add_out(child_left) - - child_right = Node([rank, line+1, tokens_next_right]) - parent.add_out(child_right) - - # Create new leaf nodes. - for i, leaf in leaves.items(): - if i < line: - new_leaves[i] = Node([rank, i, leaf.data[2]]) - leaf.add_out(new_leaves[i]) - elif i > line: - new_leaves[i+1] = Node([rank, i+1, leaf.data[2]]) - leaf.add_out(new_leaves[i+1]) - new_leaves[line] = child_left - new_leaves[line+1] = child_right - - elif action.type == 'remove': - parent_right = leaves[line+1] - - child = Node([rank, line, tokens_next]) - parent_right.add_out(child) - parent.add_out(child) - - # Create new leaf nodes. - for i, leaf in leaves.items(): - if i < line: - new_leaves[i] = Node([rank, i, leaf.data[2]]) - leaf.add_out(new_leaves[i]) - elif i > line+1: - new_leaves[i-1] = Node([rank, i-1, leaf.data[2]]) - leaf.add_out(new_leaves[i-1]) - new_leaves[line] = child - else: - # Skip the node if the next action is insert/remove (except \n) - # on the same line. - if action_id < len(actions)-1: - action_next = actions[action_id+1] - if action_next.type in ('insert', 'remove'): - line_next = code_next[:action_next.offset].count('\n') - if action_next.text != '\n' and line == line_next: - continue - - # Skip the node if it is the same as the parent. - if tokens_next == parent.data[2]: - continue - - child = Node([rank, line, tokens_next]) - parent.add_out(child) - - # Create new leaf nodes. - for i, leaf in leaves.items(): - if i != line: - new_leaves[i] = Node([rank, i, leaf.data[2]]) - leaf.add_out(new_leaves[i]) - new_leaves[line] = child - - leaves = new_leaves - nodes += leaves.values() - rank += 1 - - return nodes, submissions, queries - -# Return a set of edits that appear in the trace_graph given by [nodes]. -def graph_edits(nodes): - edits = set() - for a in nodes: - a_data = a.data[2] - for b in a.eout: - b_data = b.data[2] - - # Normalize a → b into start → end. Reuse variable names from a - # when normalizing b. - var_names = {} - start = normalized(a_data, var_names) - end = normalized(b_data, var_names) - - if start == end: - continue - - # An edit start → ε happens each time the user inserts \n; ignore - # such cases. - if not end and len(a.eout) > 1: - continue - - # Skip edits where start/end is composed of more than one part. - # TODO improve trace_graph to handle this instead. - if [t for t in annotate(stringify(start)) if t.part > 0]: - continue - if [t for t in annotate(stringify(end)) if t.part > 0]: - continue - - edits.add((start, end)) - return edits - -# Build an edit graph for each trace and find "meaningful" (to be defined) -# edits. Return a dictionary of edits and their frequencies, and also -# submissions and queries in [traces]. -def get_edits_from_traces(traces): - # Return values: counts for observed edits, lines, submissions and queries. - edits = collections.Counter() + if action.type in {'solve', 'solve_all', 'test'}: + if action.type == 'solve' or action.type == 'solve_all': + queries.add(action.query.rstrip(' .')) + elif action.type == 'test': + correct = test(code) + submissions.add((code, correct)) + if correct: + # Ignore actions after the first correct version. + done = True + break + + tree = prolog_parse(code) + if tree and tree.leaves() and tree != prev_tree: + for terminals, path in interesting_ranges(tree): + pos_start = terminals[0].pos + pos_end = terminals[-1].pos + len(terminals[-1].val) + # If there is an open edit with the same range, don't add a new one. + found = False + for e_start_tree, e_start_tokens, e_path, e_pos_start, e_pos_end in open_edits: + if e_pos_start == pos_start and e_pos_end == pos_end: + found = True + break + if not found: + #print('OPENING {}'.format(terminals)) + open_edits.append([tree, terminals, path, pos_start, pos_end]) + prev_tree = tree + + if action.type in {'insert', 'remove'}: + new_open_edits = [] + for start_tree, start_tokens, path, pos_start, pos_end in open_edits: + new_pos_start, new_pos_end = pos_start, pos_end + if action.type == 'remove': + if action.offset < pos_end: + new_pos_end -= 1 + if action.offset < pos_start: + new_pos_start -= 1 + elif action.type == 'insert': + if action.offset < pos_start: + new_pos_start += 1 + new_pos_end += 1 + elif action.offset == pos_start: + new_pos_end += 1 + elif action.offset < pos_end: + new_pos_end += 1 + elif action.offset == pos_end: + if (prev_action is None or + prev_action.type == 'insert' and prev_action.offset == action.offset-1 or + prev_action.type == 'remove' and prev_action.offset == action.offset-1): + orig_next = None + for terminal in start_tree.leaves(): + if terminal.pos >= start_tokens[-1].pos + len(start_tokens[-1].val): + orig_next = terminal + break + if not (orig_next and orig_next.val[0] == action.text): + new_pos_end += 1 + pass + if new_pos_start != new_pos_end: + new_open_edits.append([start_tree, start_tokens, path, new_pos_start, new_pos_end]) + open_edits = new_open_edits + prev_action = action + + if done: + for start_tree, start_tokens, path, pos_start, pos_end in open_edits: + end_tokens = tokenize(code[pos_start:pos_end]) + names = {} + start_normal = rename_vars_list(start_tokens, names) + end_normal = rename_vars_list(end_tokens, names) + norm_tree = rename_vars_ast2(start_tree, names) + add_edit(path, tuple(start_normal), tuple(end_normal), norm_tree) + + return edits, submissions, queries + +def get_edits_from_solutions(solutions, test): + # For each observed edit, store a list of features (with repeats) of ASTs + # where they were observed. submissions = collections.Counter() queries = collections.Counter() - names = collections.Counter() + edits = collections.defaultdict(list) + + for solution in solutions: + trace = solution.trace + uid = solution.codeq_user_id + trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid) - # Counts of traces where each line appears as a leaf / any node. - n_leaf = collections.Counter() - n_all = collections.Counter() + # Update edits. + for edit, features in trace_edits.items(): + edits[edit].extend(features) - for trace in traces: - nodes, trace_submissions, trace_queries = trace_graph(trace) - counted_tokens = [] - # Update the submissions/queries counters (use normalized variables). - for (submission, correct) in trace_submissions: - if correct: - tokens = list(tokenize(submission)) - for token in tokens: - if token.type == 'NAME' and token.val not in counted_tokens: - names[token.val] += 1 - counted_tokens.append(token.val) - code = stringify(rename_vars(tokens)) - submissions[code] += 1 + # Update submission/query counters (use normalized variables). + for code, correct in trace_submissions: + code = stringify(rename_vars(tokenize(code))) + submissions[(code, correct)] += 1 for query in trace_queries: code = stringify(rename_vars(tokenize(query))) queries[code] += 1 - # Update the edit and leaf/node counters. - edits.update(graph_edits(nodes)) - n_leaf.update(set([normalized(n.data[2]) for n in nodes if n.data[2] and not n.eout])) - n_all.update(set([normalized(n.data[2]) for n in nodes if n.data[2]])) - # Discard edits that only occur in one trace. - singletons = [edit for edit in edits if edits[edit] < 2] + singletons = [edit for edit in edits if len(edits[edit]) < 2] for edit in singletons: del edits[edit] + n_start = collections.Counter() + n_start_all = 0 + for (path, a, b), features in edits.items(): + edits[(path, a, b)] = len(features) + n_start[(path, a)] += len(features) + n_start_all += len(features) + # Find the probability of each edit a → b. - for (a, b), count in edits.items(): - p = 1.0 - if a: - p *= 1 - (n_leaf[a] / (n_all[a]+1)) - if b: - b_normal = normalized(b) - p *= n_leaf[b_normal] / (n_all[b_normal]+1) - if a and b: - p = math.sqrt(p) - edits[(a, b)] = p + new_edits = {} + for (path, a, b), count in edits.items(): + if a != b: + p = count / n_start[(path, a)] + new_edits[(path, a, b)] = p + edits = new_edits # Tweak the edit distribution to improve search. avg_p = avg(edits.values()) for edit, p in edits.items(): edits[edit] = logistic(p, k=3, x_0=avg_p) - return edits, submissions, queries, names + return edits, submissions, queries def classify_edits(edits): inserts = {} @@ -255,7 +194,9 @@ def classify_edits(edits): # Extract edits and other data from existing traces for each problem. if __name__ == '__main__': import pickle - from db.models import Problem, Solution + from db.models import CodeqUser, Problem, Solution + import server.problems + from termcolor import colored # Ignore traces from these users. ignored_users = [ @@ -265,21 +206,32 @@ if __name__ == '__main__': 358, # sasha ] - edits, submissions, queries, names = {}, {}, {}, {} + edits, submissions, queries = {}, {}, {} for problem in Problem.list(): pid = problem.id - solutions = Solution.filter(problem_id=pid, done=True) - traces = [s.trace for s in solutions if s.codeq_user_id not in ignored_users] - if not traces: + solutions = [s for s in Solution.filter(problem_id=pid, done=True) + if s.codeq_user_id not in ignored_users] + if not solutions: print('No traces for {}'.format(problem.identifier)) continue + # Testing function. + solved_problems = [p for p in CodeqUser.solved_problems(1, problem.language) + if p != (problem.group, problem.identifier)] + other_solutions = server.problems.solutions_for_problems(problem.language, solved_problems) + problem_module = server.problems.load_problem(problem.language, problem.group, problem.identifier, 'common') + def test(code): + correct, hints = problem_module.test(code, solved_problems) + return correct + print('Analyzing traces for {}… '.format(problem.identifier), end='', flush=True) + print('{} traces… '.format(len(solutions)), end='', flush=True) try: - edits[pid], submissions[pid], queries[pid], names[pid] = get_edits_from_traces(traces) - print('{} edits, {} submissions, {} queries, {} names'.format( - len(edits[pid]), len(submissions[pid]), len(queries[pid]), len(names[pid]))) + edits[pid], submissions[pid], queries[pid] = get_edits_from_solutions(solutions, test) + print('{} edits, {} submissions, {} queries'.format( + len(edits[pid]), len(submissions[pid]), len(queries[pid]))) except Exception as ex: - print('error: {}'.format(ex)) + import traceback + traceback.print_exc() - pickle.dump((edits, submissions, queries, names), open('edits.pickle', 'wb')) + pickle.dump((edits, submissions, queries), open('edits.pickle', 'wb')) diff --git a/monkey/test.py b/monkey/test.py index bca55d8..9eb91e1 100755 --- a/monkey/test.py +++ b/monkey/test.py @@ -23,7 +23,7 @@ from termcolor import colored from db.models import CodeqUser, Problem, Solution from .graph import graphviz from . import fix, fix_hints -from prolog.util import annotate, compose, stringify +from prolog.util import parse, tokenize, stringify import server.problems from .util import indent @@ -48,8 +48,8 @@ def test(code): traces = [s.trace for s in Solution.filter(problem_id=problem.id)] # Load hint database stored in edits.pickle. -edits, submissions, queries, names = pickle.load(open('edits.pickle', 'rb')) -edits, submissions, queries, names = edits[problem.id], submissions[problem.id], queries[problem.id], names[problem.id] +edits, submissions, queries = pickle.load(open('edits.pickle', 'rb')) +edits, submissions, queries = edits[problem.id], submissions[problem.id], queries[problem.id] # Load current status (programs for which a hint was found). try: @@ -61,13 +61,13 @@ def print_hint(code, solution, steps, fix_time, n_tested): if solution: print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green')) print(colored(' Edits', 'blue')) - for step_type, pos, a, b in steps: - print(' {}: {} {} → {}'.format(pos, step_type, stringify(a), stringify(b))) + for idx, a, b in steps: + print(' {}: {} → {}'.format(idx, stringify(a), stringify(b))) print(colored(' Hints', 'blue')) - for fix_type, start, end, msg in fix_hints(code, steps): - print(' {}-{}: {} (fix type: {})'.format(start, end, msg, fix_type)) + for hint in fix_hints(code, steps): + print(' {}'.format(hint)) print(colored(' Final version', 'blue')) - print(indent(compose(annotate(solution)), 2)) + print(indent(stringify(parse(solution)), 2)) else: print(colored('Hint not found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'red')) @@ -88,36 +88,41 @@ if len(sys.argv) == 2: # Try finding a fix. print(colored('Analyzing program…', 'yellow')) - solution, steps, fix_time, n_tested = fix(name, code, edits, test, debug=True) + solution, steps, fix_time, n_tested = fix(code, edits, test, debug=True) print_hint(code, solution, steps, fix_time, n_tested) # Test fix() on incorrect student submissions. elif sys.argv[2] == 'test': timeout = int(sys.argv[3]) if len(sys.argv) == 4 else 10 - - # Find incorrect submissions. - incorrect_all = [] - for submission, count in sorted(submissions.items()): - if not test(submission): - # This incorrect submission appeared in [count] traces. - incorrect_all += [submission]*count - incorrect = set(incorrect_all) + incorrect = [] + for (code, correct), count in sorted(submissions.items()): + # Skip syntactically-incorrect submissions. + if parse(code) is None: + continue + if not correct: + incorrect += [code] * count print('Fixing {}/{} programs (timeout={})…'.format( - len([p for p in incorrect if p not in done]), len(incorrect), timeout)) + len([code for code in incorrect if code not in done]), + len(incorrect), timeout)) + undone = [] for i, program in enumerate(sorted(incorrect)): if program in done: + done.append(program) + continue + if program in undone: continue - print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow')) - print(indent(compose(annotate(program)), 2)) - solution, steps, fix_time, n_tested = fix(name, program, edits, test, timeout=timeout) - if solution: - done.append(program) + print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow')) + solution, steps, fix_time, n_tested = fix(program, edits, test, timeout=timeout, debug=True) print_hint(program, solution, steps, fix_time, n_tested) print() + if solution: + done.append(program) + else: + undone.append(program) pickle.dump(done, open('status-'+str(problem.id)+'.pickle', 'wb')) print('Found hints for ' + str(len(done)) + ' of ' + str(len(incorrect)) + ' incorrect programs') @@ -126,7 +131,7 @@ elif sys.argv[2] == 'test': elif sys.argv[2] == 'info': # With no additional arguments, print some stats. if len(sys.argv) == 3: - print('Problem {} ({}): {} edits and {} unique submissions in {} traces'.format( + print('Problem {} ({}): {} edits and {} different submissions in {} traces'.format( problem.id, colored(name, 'yellow'), colored(str(len(edits)), 'yellow'), colored(str(len(submissions)), 'yellow'), @@ -149,5 +154,9 @@ elif sys.argv[2] == 'info': # Print all student submissions and their counts. elif sys.argv[3] == 'submissions': - for submission, count in submissions.most_common(): - print('{}\t{}'.format(count, submission)) + which = None + if len(sys.argv) > 4: + which = sys.argv[4] == 'good' + for (code, correct), count in submissions.most_common(): + if which is None or correct == which: + print('{}\t{}'.format(count, code)) |