From fa39fe7bfedd0b2e615d369adb5b510ceb9b857f Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 18 Nov 2015 13:20:00 +0100 Subject: Use a more general method for extracting edits This is a large overhaul of monkey code. Before, only edits within individual lines were tracked, which required a Prolog-specific method for splitting a program into a list of lines for every rule. In this version, modifications can be tracked within arbitrary code ranges. Ranges to be tracked are determined by selecting "interesting" subtrees in the AST of the starting code version. The new method is simpler, less language-dependent and easier to extend. The downside is that a program must be syntactically correct before we can attempt to fix it (the previous approach could handle programs with syntax errors in some cases). This commit also integrates a call to monkey.fix in prolog_session.hint, by running it if no other hint is found. --- monkey/__init__.py | 333 +++++++++-------------------------------------- monkey/edits.py | 332 ++++++++++++++++++++-------------------------- monkey/test.py | 61 +++++---- prolog/util.py | 187 +++++++++++++------------- server/prolog_session.py | 20 +++ 5 files changed, 349 insertions(+), 584 deletions(-) mode change 100755 => 100644 monkey/__init__.py diff --git a/monkey/__init__.py b/monkey/__init__.py old mode 100755 new mode 100644 index 09f77b0..db00493 --- a/monkey/__init__.py +++ b/monkey/__init__.py @@ -18,8 +18,7 @@ import math import time import prolog.engine -from .edits import classify_edits -from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify +from prolog.util import Token, tokenize, rename_vars, stringify, parse, interesting_ranges, rename_vars_list from .util import damerau_levenshtein, PQueue # Starting from [code], find a sequence of edits that transforms it into a @@ -27,216 +26,45 @@ from .util import damerau_levenshtein, PQueue # programs. # Return (solution, edits, time spent, #programs checked). If no solution is # found within [timeout] seconds, return solution='' and edits=[]. -def fix(name, code, edits, test, timeout=30, debug=False): - # A dictionary of edits with costs for each edit type (insert, remove or - # change a line). Edits are tuples (before, after), where before and after - # are sequences of tokens. Variable names are normalized to A0, A1, A2,…. - inserts, removes, changes = classify_edits(edits) - - # Generate states that can be reached from the given program with one edit. - # The program is given as a list of tokens. +def fix(code, edits, test, timeout=30, debug=False): def step(program, path=None): - # Apply edits to program in order; skip tokens with index lower than - # last step. - start = path[-1][4] if path else 0 - - for i, token in enumerate(program): - # Get variable names in the current rule. - if i == 0 or program[i-1].type == 'PERIOD': - variables = [] - for t in program[i:]: - if t.type == 'VARIABLE' and not t.val.startswith('_'): - if t.val not in variables: - variables.append(t.val) - elif t.type == 'PERIOD': - break - - # Remember the first token in the current part. - if i == 0 or program[i-1].stop: - first = i - - # Don't edit already modified parts of the program. - if i < start: - continue - - # Add a new fact at beginning or after another rule. - if i == 0 or token.type == 'PERIOD': - new_start = i+1 if token.type == 'PERIOD' else 0 - n_rules = program.count(Token('PERIOD', '.')) - rule = 0 if i == 0 else program[i-1].rule+1 # index of new rule - - for new, cost in inserts.items(): - # New rule must start with correct predicate name. - if not (new[0].type == 'NAME' and new[0].val in name): - continue - - # Here we always insert a fact, so replace trailing :- with .. - if new[-1].type == 'FROM': - new = new[:-1] - - new_real = tuple([t.clone(rule=rule, part=0) for t in new + (Token('PERIOD', '.', stop=True),)]) - new_after = tuple([t.clone(rule=t.rule+1) for t in program[new_start:]]) - - new_program = program[:new_start] + new_real + new_after - new_step = ('add_rule', new_start, (), new_real, new_start) - new_cost = cost * math.pow(0.3, n_rules) - yield (new_program, new_step, new_cost) - - if token.stop and i > first: - real_last = last = i+1 - if token.type != 'FROM': - real_last -= 1 - part = program[first:real_last] - part_whole = program[first:last] - part_normal = tuple(rename_vars(part)) - - # Apply each edit a→b where a matches this part. - seen = False # has there been such an edit? - for (a, b), cost in changes.items(): - if part_normal == a: - seen = True - if b[-1].type == 'FROM': - b = b[:-1] + (b[-1].clone(stop=True),) - b_real = tuple([t.clone(rule=program[first].rule, - part=program[first].part) - for t in map_vars(a, b, part, variables)]) - - new_program = program[:first] + b_real + program[real_last:] - new_step = ('change_part', first, part, b_real, first) - yield (new_program, new_step, cost) - - - # Remove this part. - if token.type in ('COMMA', 'SEMI'): - if part_normal in removes.keys() or not seen: - new_after = list(program[last:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(part=t.part-1) - new_program = program[:first] + tuple(new_after) - new_step = ('remove_part', first, part_whole, (), first-1) - new_cost = removes.get(part_normal, 1.0) * 0.99 - yield (new_program, new_step, new_cost) - - # Remove this part at the end of the current rule. - if token.type == 'PERIOD' and token.part > 0: - if part_normal in removes.keys() or not seen: - if token.part == 0: # part is fact, remove rule - new_after = list(program[last+1:]) - for j, t in enumerate(new_after): - new_after[j] = t.clone(rule=t.rule-1) - new_program = program[:first] + tuple(new_after) - new_step = ('remove_rule', first, part, (), first) - else: # part is subgoal, remove part - new_after = list(program[last-1:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(part=t.part-1) - new_program = program[:first-1] + tuple(new_after) - new_step = ('remove_part', first-1, (program[first-1],) + part, (), first-1) - - new_cost = removes.get(part_normal, 1.0) * 0.99 - yield (new_program, new_step, new_cost) - - - # Insert a new part (goal) after this part. - if token.type in ('COMMA', 'FROM'): - for new, cost in inserts.items(): - # Don't try to insert a head into the body. - if new[-1].type == 'FROM': - continue - - new_real = tuple([t.clone(rule=program[first].rule, - part=program[first].part+1) - for t in map_vars([], new, [], variables) + [Token('COMMA', ',', stop=True)]]) - - new_after = list(program[last:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(rule=program[first].rule, part=t.part+1) - - new_program = program[:last] + new_real + tuple(new_after) - new_step = ('add_part', last, (), new_real, last) - new_cost = cost * 0.4 - yield (new_program, new_step, new_cost) - - # Insert a new part (goal) at the end of current rule. - if token.type == 'PERIOD': - for new, cost in inserts.items(): - # Don't try to insert a head into the body. - if new[-1].type == 'FROM': - continue - - prepend = Token('FROM', ':-') if token.part == 0 else Token('COMMA', ',') - new_real = (prepend.clone(stop=True, rule=token.rule, part=token.part),) + \ - tuple([t.clone(rule=token.rule, part=token.part+1) - for t in map_vars([], new, [], variables)]) - - new_after = list(program[last-1:]) - for j, t in enumerate(new_after): - if t.rule > token.rule: - break - new_after[j] = t.clone(rule=t.rule, part=t.part+1) - - new_program = program[:last-1] + new_real + tuple(new_after) - new_step = ('add_part', last-1, (), new_real, last) - new_cost = cost * 0.4 - yield (new_program, new_step, new_cost) - - # Return a cleaned-up list of steps. - def postprocess(steps): - new_steps = [] - for step in steps: - # Remove the last field from each step as it is unnecessary after a - # path has been found. - step = step[:4] - if new_steps: - prev = new_steps[-1] - if prev[0] == 'remove_part' and step[0] == 'remove_part' and \ - prev[1] == step[1]: - new_steps[-1] = ('remove_part', prev[1], prev[2]+step[2], ()) - continue - - if prev[0] == 'remove_part' and step[0] == 'add_part' and \ - prev[1] == step[1]: - new_steps[-1] = ('change_part', prev[1], prev[2], step[3]) - continue - - if prev[0] == 'change_part' and step[0] == 'change_part' and \ - prev[1] == step[1] and step[2] == prev[3]: - new_steps[-1] = ('change_part', prev[1], prev[2], step[3]) - continue - - if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \ - prev[1] == step[1] and step[2] == prev[3][:-1]: - new_steps[-1] = (prev[0], prev[1], prev[2], step[3]+(prev[3][-1],)) - continue - - if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \ - step[1] == prev[1]+1 and step[2] == prev[3][1:]: - new_steps[-1] = (prev[0], prev[1], prev[2], (prev[3][0],)+step[3]) - continue - new_steps.append(step) - for step in new_steps: - print('index {}: {} {} → {}'.format( - step[1], step[0], stringify(step[2]), stringify(step[3]))) - return new_steps + tokens = program.leaves() + for part, range_path in interesting_ranges(program): + names = {} + part_normal = tuple(rename_vars_list(part, names)) + for (path, a, b), p in edits.items(): + if path == range_path and a == part_normal: + reverse_names = {v: k for k, v in names.items()} + b_real = tuple(rename_vars(b, reverse_names)) + new_tokens = [] + idx = None + for i, t in enumerate(tokens): + if t.pos >= part[0].pos and t.pos+len(t.val) <= part[-1].pos+len(part[-1].val): + if idx is None: + idx = i + new_tokens.extend(b_real) + else: + new_tokens.append(t) + new_code = stringify(new_tokens) + new_program = parse(new_code) + new_step = (idx, tuple(part), b_real) + if new_program is not None: + yield (new_program.freeze(), new_step, p) # Main loop: best-first search through generated programs. todo = PQueue() # priority queue of candidate solutions done = set() # programs we have already visited + n_tested = 0 + start_time = time.monotonic() + total_time = 0 # Each program gets a task with the sequence of edits that generated it and # the associated cost. First candidate with cost 1 is the initial program. - program = tuple(annotate(code)) - todo.push((program, (), 1.0), -1.0) + program = parse(code) + if program is None: + return '', [], total_time, n_tested + todo.push((program.freeze(), (), 1.0), -1.0) - n_tested = 0 - start_time = time.monotonic() - total_time = 0 while total_time < timeout: # Get next candidate. task = todo.pop() @@ -245,7 +73,7 @@ def fix(name, code, edits, test, timeout=30, debug=False): program, path, path_cost = task # If we have already seen this code, skip it. - code = compose(program) + code = stringify(program) if code in done: continue done.add(code) @@ -253,12 +81,11 @@ def fix(name, code, edits, test, timeout=30, debug=False): # Print some info about the current task. if debug: print('Cost {:.12f}'.format(path_cost)) - for step_type, idx, a, b, _ in path: - print('index {}: {} {} → {}'.format(idx, step_type, stringify(a), stringify(b))) + for idx, a, b in path: + print('{}: {} → {}'.format(idx, stringify(a), stringify(b))) # If the code is correct, we are done. if test(code): - path = postprocess(path) return code, path, total_time, n_tested n_tested += 1 @@ -274,66 +101,32 @@ def fix(name, code, edits, test, timeout=30, debug=False): return '', [], total_time, n_tested -# Return tuples (type, start, end, message) describing edits in [path]. -def fix_hints(code, path): - program = list(annotate(code)) - - for step_type, idx, a, b in path: - if step_type == 'add_rule': - fix_type = 'insert' - msg = 'Add another rule.' - start = program[idx].pos-1 - end = start+2 - elif step_type == 'add_part': - fix_type = 'insert' - msg = 'Add another goal to this rule.' - start = program[idx].pos-1 - end = start+2 - elif step_type == 'remove_rule': - fix_type = 'remove' - msg = 'Remove this rule.' - start = program[idx].pos - end = program[idx + len(a) - 1].pos - elif step_type == 'remove_part': - fix_type = 'remove' - msg = 'Remove this goal.' - start = program[idx].pos - end = idx + len(a) - 1 - if program[end].type in ('COMMA', 'PERIOD', 'SEMI'): - end -= 1 - end = program[end].pos + len(program[end].val) - elif step_type == 'change_part': - fix_type = 'change' - msg = 'Check this part.' - first = 0 - while idx+first < len(program)-1 and first < len(a) and first < len(b) and a[first] == b[first]: - first += 1 - last = len(a)-1 - while last < len(b) and last >= first and a[last] == b[last]: - last -= 1 - start = program[idx+first].pos - end = program[idx+last].pos + len(program[idx+last].val) - - program[idx:idx+len(a)] = [t.clone(pos=program[idx].pos) for t in b] - yield fix_type, start, end, msg - - -# Checks for typos in the code and suggest the nearst uploaded term by other users. -def check_typos(code, names): - for token in annotate(code): - if token.type == 'NAME': - nearest_name = ' ' - nearest_dist = 1000 - own_count = names.get(token.val, 0) # count of the token.val which is compared with the - # each name in the names - for name in names.items(): - if name[0] == token.val: # If the names are the skip the code - continue - - distance = damerau_levenshtein(token.val, name[0]) - - if distance < nearest_dist and distance > 0 and own_count < name[1]: - nearest_dist = distance # Set best_dist and best_name if the less one is found - nearest_name = name[0] - if nearest_dist > 0 and nearest_dist/len(nearest_name) <= 1/3: - yield 'typo', token.pos, token.pos + len(token.val) , 'Did you mean "{}"?'.format(nearest_name) +def min_diff(a, b): + first = 0 + while first < len(a) and first < len(b) and a[first] == b[first]: + first += 1 + last = 0 + while first-last < len(a) and first-last < len(b) and a[last-1] == b[last-1]: + last -= 1 + return first, last + +def fix_hints(code, steps): + hints = [] + tokens = tokenize(code) + for idx, a, b in steps: + start, end = min_diff(a, b) + + if start >= len(a)+end: + hint_id = 'monkey_insert' + elif start >= len(b)+end: + hint_id = 'monkey_remove' + else: + hint_id = 'monkey_change' + + pos_start = tokens[idx+start].pos + pos_end = tokens[idx+len(a)+end-1].pos + len(tokens[idx+len(a)+end-1].val) + pos_end = max(pos_end, pos_start+1) + + hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}] + tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b] + return hints diff --git a/monkey/edits.py b/monkey/edits.py index 17f13d3..7744fa2 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -19,26 +19,20 @@ import math from .action import expand, parse from .graph import Node -from prolog.util import annotate, normalized, rename_vars, stringify, tokenize +from prolog.util import Token, normalized, parse as prolog_parse, rename_vars, rename_vars_ast2, rename_vars_list, interesting_ranges, stringify, tokenize from .util import get_line, avg, logistic -# Parse the sequence of actions in [trace] and return a directed acyclic graph -# representing development history. Each node represents a particular version -# of some line, and each edge represents a "line edit" (contiguous sequence of -# inserts/removes within a single line). -# Return a list of nodes with root as the first element. Also return sets of -# submissions (user-tested program versions) and queries in this attempt. -def trace_graph(trace): - # Return values. - nodes = [Node([0, 0, ()])] # Node data: rank (Y), line no. (X), and tokens. +def get_edits_from_trace(trace, test, id): submissions = set() # Program versions at 'test' actions. queries = set() # Queries run by the student. - # State variables. - leaves = {0: nodes[0]} # Current leaf node for each line. - rank = 1 # Rank (order / y-position) for the next node. - code_next = '' # Program code after applying the current action. - done = False # Set to True on first correct version. + # For each observed edit, store a list of features (with repeats) of ASTs + # where they were observed. + edits = collections.defaultdict(list) + def add_edit(path, start, end, tree): + if start == end: + return + edits[(path, start, end)].append(id) # Parse trace actions and ensure there is a separate action for each # inserted/removed character. @@ -49,194 +43,139 @@ def trace_graph(trace): # Only a few traces fail to parse, so just skip them. actions = [] + # State variables. + open_edits = [] + code_next = '' # Program code after applying the current action. + done = False # Set to True on first correct version. + prev_tree = None + prev_action = None + for action_id, action in enumerate(actions): code = code_next code_next = action.apply(code) - if action.type == 'test': - submissions.add((code, action.total == action.passed)) - if action.total == action.passed: - done = True - - elif action.type == 'solve' or action.type == 'solve_all': - queries.add(action.query.rstrip(' .')) - - elif action.type == 'insert' or action.type == 'remove': - # Ignore edits after the first correct version. - if done: - continue - - # Number of the changed line. - line = code[:action.offset].count('\n') - # Current leaf node for this line. - parent = leaves[line] - # Tokens in this line after applying [action]. - tokens_next = tuple(tokenize(get_line(code_next, line))) - - # If a new node is inserted, clone each leaf into the next rank. - # This makes it easier to print the graph for graphviz; when - # analyzing the graph, duplicate nodes without siblings should be - # ignored. - new_leaves = {} - - if action.text == '\n': - if action.type == 'insert': - tokens_next_right = tuple(tokenize(get_line(code_next, line+1))) - - child_left = Node([rank, line, tokens_next]) - parent.add_out(child_left) - - child_right = Node([rank, line+1, tokens_next_right]) - parent.add_out(child_right) - - # Create new leaf nodes. - for i, leaf in leaves.items(): - if i < line: - new_leaves[i] = Node([rank, i, leaf.data[2]]) - leaf.add_out(new_leaves[i]) - elif i > line: - new_leaves[i+1] = Node([rank, i+1, leaf.data[2]]) - leaf.add_out(new_leaves[i+1]) - new_leaves[line] = child_left - new_leaves[line+1] = child_right - - elif action.type == 'remove': - parent_right = leaves[line+1] - - child = Node([rank, line, tokens_next]) - parent_right.add_out(child) - parent.add_out(child) - - # Create new leaf nodes. - for i, leaf in leaves.items(): - if i < line: - new_leaves[i] = Node([rank, i, leaf.data[2]]) - leaf.add_out(new_leaves[i]) - elif i > line+1: - new_leaves[i-1] = Node([rank, i-1, leaf.data[2]]) - leaf.add_out(new_leaves[i-1]) - new_leaves[line] = child - else: - # Skip the node if the next action is insert/remove (except \n) - # on the same line. - if action_id < len(actions)-1: - action_next = actions[action_id+1] - if action_next.type in ('insert', 'remove'): - line_next = code_next[:action_next.offset].count('\n') - if action_next.text != '\n' and line == line_next: - continue - - # Skip the node if it is the same as the parent. - if tokens_next == parent.data[2]: - continue - - child = Node([rank, line, tokens_next]) - parent.add_out(child) - - # Create new leaf nodes. - for i, leaf in leaves.items(): - if i != line: - new_leaves[i] = Node([rank, i, leaf.data[2]]) - leaf.add_out(new_leaves[i]) - new_leaves[line] = child - - leaves = new_leaves - nodes += leaves.values() - rank += 1 - - return nodes, submissions, queries - -# Return a set of edits that appear in the trace_graph given by [nodes]. -def graph_edits(nodes): - edits = set() - for a in nodes: - a_data = a.data[2] - for b in a.eout: - b_data = b.data[2] - - # Normalize a → b into start → end. Reuse variable names from a - # when normalizing b. - var_names = {} - start = normalized(a_data, var_names) - end = normalized(b_data, var_names) - - if start == end: - continue - - # An edit start → ε happens each time the user inserts \n; ignore - # such cases. - if not end and len(a.eout) > 1: - continue - - # Skip edits where start/end is composed of more than one part. - # TODO improve trace_graph to handle this instead. - if [t for t in annotate(stringify(start)) if t.part > 0]: - continue - if [t for t in annotate(stringify(end)) if t.part > 0]: - continue - - edits.add((start, end)) - return edits - -# Build an edit graph for each trace and find "meaningful" (to be defined) -# edits. Return a dictionary of edits and their frequencies, and also -# submissions and queries in [traces]. -def get_edits_from_traces(traces): - # Return values: counts for observed edits, lines, submissions and queries. - edits = collections.Counter() + if action.type in {'solve', 'solve_all', 'test'}: + if action.type == 'solve' or action.type == 'solve_all': + queries.add(action.query.rstrip(' .')) + elif action.type == 'test': + correct = test(code) + submissions.add((code, correct)) + if correct: + # Ignore actions after the first correct version. + done = True + break + + tree = prolog_parse(code) + if tree and tree.leaves() and tree != prev_tree: + for terminals, path in interesting_ranges(tree): + pos_start = terminals[0].pos + pos_end = terminals[-1].pos + len(terminals[-1].val) + # If there is an open edit with the same range, don't add a new one. + found = False + for e_start_tree, e_start_tokens, e_path, e_pos_start, e_pos_end in open_edits: + if e_pos_start == pos_start and e_pos_end == pos_end: + found = True + break + if not found: + #print('OPENING {}'.format(terminals)) + open_edits.append([tree, terminals, path, pos_start, pos_end]) + prev_tree = tree + + if action.type in {'insert', 'remove'}: + new_open_edits = [] + for start_tree, start_tokens, path, pos_start, pos_end in open_edits: + new_pos_start, new_pos_end = pos_start, pos_end + if action.type == 'remove': + if action.offset < pos_end: + new_pos_end -= 1 + if action.offset < pos_start: + new_pos_start -= 1 + elif action.type == 'insert': + if action.offset < pos_start: + new_pos_start += 1 + new_pos_end += 1 + elif action.offset == pos_start: + new_pos_end += 1 + elif action.offset < pos_end: + new_pos_end += 1 + elif action.offset == pos_end: + if (prev_action is None or + prev_action.type == 'insert' and prev_action.offset == action.offset-1 or + prev_action.type == 'remove' and prev_action.offset == action.offset-1): + orig_next = None + for terminal in start_tree.leaves(): + if terminal.pos >= start_tokens[-1].pos + len(start_tokens[-1].val): + orig_next = terminal + break + if not (orig_next and orig_next.val[0] == action.text): + new_pos_end += 1 + pass + if new_pos_start != new_pos_end: + new_open_edits.append([start_tree, start_tokens, path, new_pos_start, new_pos_end]) + open_edits = new_open_edits + prev_action = action + + if done: + for start_tree, start_tokens, path, pos_start, pos_end in open_edits: + end_tokens = tokenize(code[pos_start:pos_end]) + names = {} + start_normal = rename_vars_list(start_tokens, names) + end_normal = rename_vars_list(end_tokens, names) + norm_tree = rename_vars_ast2(start_tree, names) + add_edit(path, tuple(start_normal), tuple(end_normal), norm_tree) + + return edits, submissions, queries + +def get_edits_from_solutions(solutions, test): + # For each observed edit, store a list of features (with repeats) of ASTs + # where they were observed. submissions = collections.Counter() queries = collections.Counter() - names = collections.Counter() + edits = collections.defaultdict(list) + + for solution in solutions: + trace = solution.trace + uid = solution.codeq_user_id + trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid) - # Counts of traces where each line appears as a leaf / any node. - n_leaf = collections.Counter() - n_all = collections.Counter() + # Update edits. + for edit, features in trace_edits.items(): + edits[edit].extend(features) - for trace in traces: - nodes, trace_submissions, trace_queries = trace_graph(trace) - counted_tokens = [] - # Update the submissions/queries counters (use normalized variables). - for (submission, correct) in trace_submissions: - if correct: - tokens = list(tokenize(submission)) - for token in tokens: - if token.type == 'NAME' and token.val not in counted_tokens: - names[token.val] += 1 - counted_tokens.append(token.val) - code = stringify(rename_vars(tokens)) - submissions[code] += 1 + # Update submission/query counters (use normalized variables). + for code, correct in trace_submissions: + code = stringify(rename_vars(tokenize(code))) + submissions[(code, correct)] += 1 for query in trace_queries: code = stringify(rename_vars(tokenize(query))) queries[code] += 1 - # Update the edit and leaf/node counters. - edits.update(graph_edits(nodes)) - n_leaf.update(set([normalized(n.data[2]) for n in nodes if n.data[2] and not n.eout])) - n_all.update(set([normalized(n.data[2]) for n in nodes if n.data[2]])) - # Discard edits that only occur in one trace. - singletons = [edit for edit in edits if edits[edit] < 2] + singletons = [edit for edit in edits if len(edits[edit]) < 2] for edit in singletons: del edits[edit] + n_start = collections.Counter() + n_start_all = 0 + for (path, a, b), features in edits.items(): + edits[(path, a, b)] = len(features) + n_start[(path, a)] += len(features) + n_start_all += len(features) + # Find the probability of each edit a → b. - for (a, b), count in edits.items(): - p = 1.0 - if a: - p *= 1 - (n_leaf[a] / (n_all[a]+1)) - if b: - b_normal = normalized(b) - p *= n_leaf[b_normal] / (n_all[b_normal]+1) - if a and b: - p = math.sqrt(p) - edits[(a, b)] = p + new_edits = {} + for (path, a, b), count in edits.items(): + if a != b: + p = count / n_start[(path, a)] + new_edits[(path, a, b)] = p + edits = new_edits # Tweak the edit distribution to improve search. avg_p = avg(edits.values()) for edit, p in edits.items(): edits[edit] = logistic(p, k=3, x_0=avg_p) - return edits, submissions, queries, names + return edits, submissions, queries def classify_edits(edits): inserts = {} @@ -255,7 +194,9 @@ def classify_edits(edits): # Extract edits and other data from existing traces for each problem. if __name__ == '__main__': import pickle - from db.models import Problem, Solution + from db.models import CodeqUser, Problem, Solution + import server.problems + from termcolor import colored # Ignore traces from these users. ignored_users = [ @@ -265,21 +206,32 @@ if __name__ == '__main__': 358, # sasha ] - edits, submissions, queries, names = {}, {}, {}, {} + edits, submissions, queries = {}, {}, {} for problem in Problem.list(): pid = problem.id - solutions = Solution.filter(problem_id=pid, done=True) - traces = [s.trace for s in solutions if s.codeq_user_id not in ignored_users] - if not traces: + solutions = [s for s in Solution.filter(problem_id=pid, done=True) + if s.codeq_user_id not in ignored_users] + if not solutions: print('No traces for {}'.format(problem.identifier)) continue + # Testing function. + solved_problems = [p for p in CodeqUser.solved_problems(1, problem.language) + if p != (problem.group, problem.identifier)] + other_solutions = server.problems.solutions_for_problems(problem.language, solved_problems) + problem_module = server.problems.load_problem(problem.language, problem.group, problem.identifier, 'common') + def test(code): + correct, hints = problem_module.test(code, solved_problems) + return correct + print('Analyzing traces for {}… '.format(problem.identifier), end='', flush=True) + print('{} traces… '.format(len(solutions)), end='', flush=True) try: - edits[pid], submissions[pid], queries[pid], names[pid] = get_edits_from_traces(traces) - print('{} edits, {} submissions, {} queries, {} names'.format( - len(edits[pid]), len(submissions[pid]), len(queries[pid]), len(names[pid]))) + edits[pid], submissions[pid], queries[pid] = get_edits_from_solutions(solutions, test) + print('{} edits, {} submissions, {} queries'.format( + len(edits[pid]), len(submissions[pid]), len(queries[pid]))) except Exception as ex: - print('error: {}'.format(ex)) + import traceback + traceback.print_exc() - pickle.dump((edits, submissions, queries, names), open('edits.pickle', 'wb')) + pickle.dump((edits, submissions, queries), open('edits.pickle', 'wb')) diff --git a/monkey/test.py b/monkey/test.py index bca55d8..9eb91e1 100755 --- a/monkey/test.py +++ b/monkey/test.py @@ -23,7 +23,7 @@ from termcolor import colored from db.models import CodeqUser, Problem, Solution from .graph import graphviz from . import fix, fix_hints -from prolog.util import annotate, compose, stringify +from prolog.util import parse, tokenize, stringify import server.problems from .util import indent @@ -48,8 +48,8 @@ def test(code): traces = [s.trace for s in Solution.filter(problem_id=problem.id)] # Load hint database stored in edits.pickle. -edits, submissions, queries, names = pickle.load(open('edits.pickle', 'rb')) -edits, submissions, queries, names = edits[problem.id], submissions[problem.id], queries[problem.id], names[problem.id] +edits, submissions, queries = pickle.load(open('edits.pickle', 'rb')) +edits, submissions, queries = edits[problem.id], submissions[problem.id], queries[problem.id] # Load current status (programs for which a hint was found). try: @@ -61,13 +61,13 @@ def print_hint(code, solution, steps, fix_time, n_tested): if solution: print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green')) print(colored(' Edits', 'blue')) - for step_type, pos, a, b in steps: - print(' {}: {} {} → {}'.format(pos, step_type, stringify(a), stringify(b))) + for idx, a, b in steps: + print(' {}: {} → {}'.format(idx, stringify(a), stringify(b))) print(colored(' Hints', 'blue')) - for fix_type, start, end, msg in fix_hints(code, steps): - print(' {}-{}: {} (fix type: {})'.format(start, end, msg, fix_type)) + for hint in fix_hints(code, steps): + print(' {}'.format(hint)) print(colored(' Final version', 'blue')) - print(indent(compose(annotate(solution)), 2)) + print(indent(stringify(parse(solution)), 2)) else: print(colored('Hint not found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'red')) @@ -88,36 +88,41 @@ if len(sys.argv) == 2: # Try finding a fix. print(colored('Analyzing program…', 'yellow')) - solution, steps, fix_time, n_tested = fix(name, code, edits, test, debug=True) + solution, steps, fix_time, n_tested = fix(code, edits, test, debug=True) print_hint(code, solution, steps, fix_time, n_tested) # Test fix() on incorrect student submissions. elif sys.argv[2] == 'test': timeout = int(sys.argv[3]) if len(sys.argv) == 4 else 10 - - # Find incorrect submissions. - incorrect_all = [] - for submission, count in sorted(submissions.items()): - if not test(submission): - # This incorrect submission appeared in [count] traces. - incorrect_all += [submission]*count - incorrect = set(incorrect_all) + incorrect = [] + for (code, correct), count in sorted(submissions.items()): + # Skip syntactically-incorrect submissions. + if parse(code) is None: + continue + if not correct: + incorrect += [code] * count print('Fixing {}/{} programs (timeout={})…'.format( - len([p for p in incorrect if p not in done]), len(incorrect), timeout)) + len([code for code in incorrect if code not in done]), + len(incorrect), timeout)) + undone = [] for i, program in enumerate(sorted(incorrect)): if program in done: + done.append(program) + continue + if program in undone: continue - print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow')) - print(indent(compose(annotate(program)), 2)) - solution, steps, fix_time, n_tested = fix(name, program, edits, test, timeout=timeout) - if solution: - done.append(program) + print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow')) + solution, steps, fix_time, n_tested = fix(program, edits, test, timeout=timeout, debug=True) print_hint(program, solution, steps, fix_time, n_tested) print() + if solution: + done.append(program) + else: + undone.append(program) pickle.dump(done, open('status-'+str(problem.id)+'.pickle', 'wb')) print('Found hints for ' + str(len(done)) + ' of ' + str(len(incorrect)) + ' incorrect programs') @@ -126,7 +131,7 @@ elif sys.argv[2] == 'test': elif sys.argv[2] == 'info': # With no additional arguments, print some stats. if len(sys.argv) == 3: - print('Problem {} ({}): {} edits and {} unique submissions in {} traces'.format( + print('Problem {} ({}): {} edits and {} different submissions in {} traces'.format( problem.id, colored(name, 'yellow'), colored(str(len(edits)), 'yellow'), colored(str(len(submissions)), 'yellow'), @@ -149,5 +154,9 @@ elif sys.argv[2] == 'info': # Print all student submissions and their counts. elif sys.argv[3] == 'submissions': - for submission, count in submissions.most_common(): - print('{}\t{}'.format(count, submission)) + which = None + if len(sys.argv) > 4: + which = sys.argv[4] == 'good' + for (code, correct), count in submissions.most_common(): + if which is None or correct == which: + print('{}\t{}'.format(count, code)) diff --git a/prolog/util.py b/prolog/util.py index ba61da0..4b316d5 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -20,12 +20,12 @@ from nltk import Tree # Stores a token's type and value, and optionally the position of the first # character in the lexed stream. -class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])): +class Token(namedtuple('Token', ['type', 'val', 'pos'])): __slots__ = () # Custom constructor to support default parameters. - def __new__(cls, type, val='', pos=None, rule=None, part=None, stop=False): - return super(Token, cls).__new__(cls, type, val, pos, rule, part, stop) + def __new__(cls, type, val='', pos=None): + return super(Token, cls).__new__(cls, type, val, pos) def __str__(self): return self.val @@ -45,13 +45,10 @@ class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])) return hash(self[1]) # Return a copy of this token, possibly modifying some fields. - def clone(self, type=None, val=None, pos=None, rule=None, part=None, stop=None): + def clone(self, type=None, val=None, pos=None): return Token(self.type if type is None else type, self.val if val is None else val, - self.pos if pos is None else pos, - self.rule if rule is None else rule, - self.part if part is None else part, - self.stop if stop is None else stop) + self.pos if pos is None else pos) from .lexer import lexer, operators from .parser import parser @@ -81,94 +78,6 @@ def stringify(obj): return ''.join([stringify(child) for child in obj]) + '\n' return ''.join([stringify(child) for child in obj]) -# Lex [code] into tokens with rule indexes and stop markers. -def annotate(code): - rule = 0 - part = 0 - parens = [] # stack of currently open parens/brackets/braces - in_parens = 0 # COMMA means a new part if this is 0 - - token = None - lexer.input(code) - for t in lexer: - tok_rule = rule - tok_part = part - tok_stop = True - - if t.type == 'PERIOD': # . - rule += 1 - part = 0 - in_parens = 0 - parens = [] - elif t.type in ('FROM', 'SEMI'): # :- ; - part += 1 - elif t.type == 'COMMA': # , - if not parens or in_parens == 0: - part += 1 - else: - tok_stop = False - - # Handle left parens. - elif t.type == 'LPAREN': # ( - if token and token.type == 'NAME': # name( - tok_stop = False - parens.append('COMPOUND') - in_parens += 1 - else: - parens.append(t.type) # …, ( - elif t.type == 'LBRACKET': # [ - tok_stop = False - parens.append(t.type) - in_parens += 1 - elif t.type == 'LBRACE': # { - parens.append(t.type) - - # Handle right parens. - elif t.type == 'RPAREN': # ) - if parens: - if parens[-1] == 'COMPOUND': # name(…) - tok_stop = False - parens.pop() - in_parens -= 1 - elif parens[-1] == 'LPAREN': # (…) - parens.pop() - elif t.type == 'RBRACKET': # ] - if parens and parens[-1] == 'LBRACKET': # […] - tok_stop = False - parens.pop() - in_parens -= 1 - elif t.type == 'RBRACE': # } - if parens and parens[-1] == 'LBRACE': # {…} - parens.pop() - - # Normal tokens. - else: - tok_stop = False - - token = Token(t.type, t.value, t.lexpos, tok_rule, tok_part, tok_stop) - yield token - -# Format a list of annotated [tokens]. -def compose(tokens): - code = '' - prev = None - for t in tokens: - if t.type == 'SEMI': - code += '\n ' - if prev and (prev.part != t.part or prev.rule != t.rule): - code += '\n' - if t.part: - code += ' ' - - if t.type in ('PERIOD', 'COMMA'): - code += t.val + ' ' - elif t.type in operators.values(): - code += ' ' + t.val + ' ' - else: - code += t.val - prev = t - return code.strip() - # Rename variables in [tokens] to A0, A1, A2,… in order of appearance. def rename_vars(tokens, names=None): if names is None: @@ -179,8 +88,9 @@ def rename_vars(tokens, names=None): tokens = list(tokens) for i, t in enumerate(tokens): if t.type == 'PERIOD': - names.clear() - next_id = 0 + pass +# names.clear() +# next_id = 0 elif t.type == 'VARIABLE': if t.val.startswith('_'): tokens[i] = t.clone(val='A{}'.format(next_id)) @@ -193,6 +103,87 @@ def rename_vars(tokens, names=None): tokens[i] = t.clone(val=names[cur_name]) return tokens +# Rename variables in [tokens] to A0, A1, A2,… in order of appearance. +def rename_vars_list(tokens, names=None): + if names is None: + names = {} + next_id = len(names) + + # Return a new list. + tokens = list(tokens) + for i, t in enumerate(tokens): + if t.type == 'VARIABLE': + if t.val.startswith('_'): + tokens[i] = t.clone(val='A{}'.format(next_id)) + next_id += 1 + else: + cur_name = t.val + if cur_name not in names: + names[cur_name] = 'A{}'.format(next_id) + next_id += 1 + tokens[i] = t.clone(val=names[cur_name]) + return tokens + +# Rename variables in AST rooted at [root] to A0, A1, A2,… in order of +# appearance. +def rename_vars_ast2(root, fixed_names=None): + if fixed_names is None: + fixed_names = {} + names = {} + next_id = len(fixed_names) + len(names) + + def rename_aux(node): + nonlocal fixed_names, names, next_id + if isinstance(node, Tree): + if node.label() == 'clause': + names = {} + next_id = len(fixed_names) + len(names) + new_children = [rename_aux(child) for child in node] + new_node = Tree(node.label(), new_children) + elif isinstance(node, Token): + if node.type == 'VARIABLE': + token = node + if token.val.startswith('_'): + new_node = token.clone(val='A{}'.format(next_id)) + next_id += 1 + else: + cur_name = token.val + if cur_name in fixed_names: + new_name = fixed_names[cur_name] + else: + if cur_name not in names: + names[cur_name] = 'A{}'.format(next_id) + next_id += 1 + new_name = names[cur_name] + new_node = token.clone(val=new_name) + else: + new_node = node + return new_node + return rename_aux(root) + +# Yield "interesting" parts of a Prolog AST as lists of tokens. +def interesting_ranges(ast, path=()): + if ast.label() in {'clause', 'head', 'or', 'if', 'and'}: + if ast.label() == 'and': + for i in range(0, len(ast), 2): + for j in range(i, len(ast), 2): + subs = ast[i:j+1] + terminals = [] + for s in subs: + terminals.extend([s] if isinstance(s, Token) else s.leaves()) + # We want at least some context. + if len(terminals) > 1: + yield terminals, path + (ast.label(),) + else: + terminals = ast.leaves() + # We want at least some context. + if len(terminals) > 1: + yield terminals, path + (ast.label(),) + + for subtree in ast: + if isinstance(subtree, Tree): + yield from interesting_ranges(subtree, path + (ast.label(),)) + # Helper function to remove trailing punctuation from lines and rename # variables to A1,A2,A3,… (potentially using [var_names]). Return a tuple. def normalized(line, var_names=None): diff --git a/server/prolog_session.py b/server/prolog_session.py index 7e0af90..1a76130 100644 --- a/server/prolog_session.py +++ b/server/prolog_session.py @@ -15,8 +15,11 @@ # along with this program. If not, see . import operator +import os.path +import pickle import threading +import monkey import prolog.engine import server import server.user_session @@ -126,6 +129,15 @@ class PrologSession(server.LanguageSession): hints = language_module.hint(program, solved_problems) if not hints and hasattr(problem_module, 'hint'): hints = problem_module.hint(program, solved_problems) + if not hints: + # Testing function for monkey. + def tester(code): + passed, hints = problem_module.test(code, solved_problems) + return passed + solution, steps, fix_time, n_tested = monkey.fix( + program, _edits[problem_id], tester, timeout=5, debug=True) + if solution and steps: + hints = [{'id': 'monkey_main'}] + monkey.fix_hints(program, steps) if not hints: hints = [{'id': 'no_hint'}] except Exception as ex: @@ -186,3 +198,11 @@ class PrologSession(server.LanguageSession): return messages, status, have_more server.language_session_handlers['prolog'] = lambda user_session, problem_id, language_identifier, group_identifier, problem_identifier: PrologSession() + +try: + _edits, _submissions, _queries = pickle.load( + open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'edits.pickle'), 'rb')) +except: + _edits = {} + _submissions = {} + _queries = {} -- cgit v1.2.1