summaryrefslogtreecommitdiff
path: root/monkey
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-11-18 13:20:00 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-12-11 16:11:15 +0100
commitfa39fe7bfedd0b2e615d369adb5b510ceb9b857f (patch)
treeabf381b007d1816247cab534ac8e2152695ad596 /monkey
parentdd723bd01634fa5ffc85402ea10947e472b257af (diff)
Use a more general method for extracting edits
This is a large overhaul of monkey code. Before, only edits within individual lines were tracked, which required a Prolog-specific method for splitting a program into a list of lines for every rule. In this version, modifications can be tracked within arbitrary code ranges. Ranges to be tracked are determined by selecting "interesting" subtrees in the AST of the starting code version. The new method is simpler, less language-dependent and easier to extend. The downside is that a program must be syntactically correct before we can attempt to fix it (the previous approach could handle programs with syntax errors in some cases). This commit also integrates a call to monkey.fix in prolog_session.hint, by running it if no other hint is found.
Diffstat (limited to 'monkey')
-rw-r--r--[-rwxr-xr-x]monkey/__init__.py333
-rw-r--r--monkey/edits.py332
-rwxr-xr-xmonkey/test.py61
3 files changed, 240 insertions, 486 deletions
diff --git a/monkey/__init__.py b/monkey/__init__.py
index 09f77b0..db00493 100755..100644
--- a/monkey/__init__.py
+++ b/monkey/__init__.py
@@ -18,8 +18,7 @@ import math
import time
import prolog.engine
-from .edits import classify_edits
-from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify
+from prolog.util import Token, tokenize, rename_vars, stringify, parse, interesting_ranges, rename_vars_list
from .util import damerau_levenshtein, PQueue
# Starting from [code], find a sequence of edits that transforms it into a
@@ -27,216 +26,45 @@ from .util import damerau_levenshtein, PQueue
# programs.
# Return (solution, edits, time spent, #programs checked). If no solution is
# found within [timeout] seconds, return solution='' and edits=[].
-def fix(name, code, edits, test, timeout=30, debug=False):
- # A dictionary of edits with costs for each edit type (insert, remove or
- # change a line). Edits are tuples (before, after), where before and after
- # are sequences of tokens. Variable names are normalized to A0, A1, A2,….
- inserts, removes, changes = classify_edits(edits)
-
- # Generate states that can be reached from the given program with one edit.
- # The program is given as a list of tokens.
+def fix(code, edits, test, timeout=30, debug=False):
def step(program, path=None):
- # Apply edits to program in order; skip tokens with index lower than
- # last step.
- start = path[-1][4] if path else 0
-
- for i, token in enumerate(program):
- # Get variable names in the current rule.
- if i == 0 or program[i-1].type == 'PERIOD':
- variables = []
- for t in program[i:]:
- if t.type == 'VARIABLE' and not t.val.startswith('_'):
- if t.val not in variables:
- variables.append(t.val)
- elif t.type == 'PERIOD':
- break
-
- # Remember the first token in the current part.
- if i == 0 or program[i-1].stop:
- first = i
-
- # Don't edit already modified parts of the program.
- if i < start:
- continue
-
- # Add a new fact at beginning or after another rule.
- if i == 0 or token.type == 'PERIOD':
- new_start = i+1 if token.type == 'PERIOD' else 0
- n_rules = program.count(Token('PERIOD', '.'))
- rule = 0 if i == 0 else program[i-1].rule+1 # index of new rule
-
- for new, cost in inserts.items():
- # New rule must start with correct predicate name.
- if not (new[0].type == 'NAME' and new[0].val in name):
- continue
-
- # Here we always insert a fact, so replace trailing :- with ..
- if new[-1].type == 'FROM':
- new = new[:-1]
-
- new_real = tuple([t.clone(rule=rule, part=0) for t in new + (Token('PERIOD', '.', stop=True),)])
- new_after = tuple([t.clone(rule=t.rule+1) for t in program[new_start:]])
-
- new_program = program[:new_start] + new_real + new_after
- new_step = ('add_rule', new_start, (), new_real, new_start)
- new_cost = cost * math.pow(0.3, n_rules)
- yield (new_program, new_step, new_cost)
-
- if token.stop and i > first:
- real_last = last = i+1
- if token.type != 'FROM':
- real_last -= 1
- part = program[first:real_last]
- part_whole = program[first:last]
- part_normal = tuple(rename_vars(part))
-
- # Apply each edit a→b where a matches this part.
- seen = False # has there been such an edit?
- for (a, b), cost in changes.items():
- if part_normal == a:
- seen = True
- if b[-1].type == 'FROM':
- b = b[:-1] + (b[-1].clone(stop=True),)
- b_real = tuple([t.clone(rule=program[first].rule,
- part=program[first].part)
- for t in map_vars(a, b, part, variables)])
-
- new_program = program[:first] + b_real + program[real_last:]
- new_step = ('change_part', first, part, b_real, first)
- yield (new_program, new_step, cost)
-
-
- # Remove this part.
- if token.type in ('COMMA', 'SEMI'):
- if part_normal in removes.keys() or not seen:
- new_after = list(program[last:])
- for j, t in enumerate(new_after):
- if t.rule > token.rule:
- break
- new_after[j] = t.clone(part=t.part-1)
- new_program = program[:first] + tuple(new_after)
- new_step = ('remove_part', first, part_whole, (), first-1)
- new_cost = removes.get(part_normal, 1.0) * 0.99
- yield (new_program, new_step, new_cost)
-
- # Remove this part at the end of the current rule.
- if token.type == 'PERIOD' and token.part > 0:
- if part_normal in removes.keys() or not seen:
- if token.part == 0: # part is fact, remove rule
- new_after = list(program[last+1:])
- for j, t in enumerate(new_after):
- new_after[j] = t.clone(rule=t.rule-1)
- new_program = program[:first] + tuple(new_after)
- new_step = ('remove_rule', first, part, (), first)
- else: # part is subgoal, remove part
- new_after = list(program[last-1:])
- for j, t in enumerate(new_after):
- if t.rule > token.rule:
- break
- new_after[j] = t.clone(part=t.part-1)
- new_program = program[:first-1] + tuple(new_after)
- new_step = ('remove_part', first-1, (program[first-1],) + part, (), first-1)
-
- new_cost = removes.get(part_normal, 1.0) * 0.99
- yield (new_program, new_step, new_cost)
-
-
- # Insert a new part (goal) after this part.
- if token.type in ('COMMA', 'FROM'):
- for new, cost in inserts.items():
- # Don't try to insert a head into the body.
- if new[-1].type == 'FROM':
- continue
-
- new_real = tuple([t.clone(rule=program[first].rule,
- part=program[first].part+1)
- for t in map_vars([], new, [], variables) + [Token('COMMA', ',', stop=True)]])
-
- new_after = list(program[last:])
- for j, t in enumerate(new_after):
- if t.rule > token.rule:
- break
- new_after[j] = t.clone(rule=program[first].rule, part=t.part+1)
-
- new_program = program[:last] + new_real + tuple(new_after)
- new_step = ('add_part', last, (), new_real, last)
- new_cost = cost * 0.4
- yield (new_program, new_step, new_cost)
-
- # Insert a new part (goal) at the end of current rule.
- if token.type == 'PERIOD':
- for new, cost in inserts.items():
- # Don't try to insert a head into the body.
- if new[-1].type == 'FROM':
- continue
-
- prepend = Token('FROM', ':-') if token.part == 0 else Token('COMMA', ',')
- new_real = (prepend.clone(stop=True, rule=token.rule, part=token.part),) + \
- tuple([t.clone(rule=token.rule, part=token.part+1)
- for t in map_vars([], new, [], variables)])
-
- new_after = list(program[last-1:])
- for j, t in enumerate(new_after):
- if t.rule > token.rule:
- break
- new_after[j] = t.clone(rule=t.rule, part=t.part+1)
-
- new_program = program[:last-1] + new_real + tuple(new_after)
- new_step = ('add_part', last-1, (), new_real, last)
- new_cost = cost * 0.4
- yield (new_program, new_step, new_cost)
-
- # Return a cleaned-up list of steps.
- def postprocess(steps):
- new_steps = []
- for step in steps:
- # Remove the last field from each step as it is unnecessary after a
- # path has been found.
- step = step[:4]
- if new_steps:
- prev = new_steps[-1]
- if prev[0] == 'remove_part' and step[0] == 'remove_part' and \
- prev[1] == step[1]:
- new_steps[-1] = ('remove_part', prev[1], prev[2]+step[2], ())
- continue
-
- if prev[0] == 'remove_part' and step[0] == 'add_part' and \
- prev[1] == step[1]:
- new_steps[-1] = ('change_part', prev[1], prev[2], step[3])
- continue
-
- if prev[0] == 'change_part' and step[0] == 'change_part' and \
- prev[1] == step[1] and step[2] == prev[3]:
- new_steps[-1] = ('change_part', prev[1], prev[2], step[3])
- continue
-
- if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \
- prev[1] == step[1] and step[2] == prev[3][:-1]:
- new_steps[-1] = (prev[0], prev[1], prev[2], step[3]+(prev[3][-1],))
- continue
-
- if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \
- step[1] == prev[1]+1 and step[2] == prev[3][1:]:
- new_steps[-1] = (prev[0], prev[1], prev[2], (prev[3][0],)+step[3])
- continue
- new_steps.append(step)
- for step in new_steps:
- print('index {}: {} {} → {}'.format(
- step[1], step[0], stringify(step[2]), stringify(step[3])))
- return new_steps
+ tokens = program.leaves()
+ for part, range_path in interesting_ranges(program):
+ names = {}
+ part_normal = tuple(rename_vars_list(part, names))
+ for (path, a, b), p in edits.items():
+ if path == range_path and a == part_normal:
+ reverse_names = {v: k for k, v in names.items()}
+ b_real = tuple(rename_vars(b, reverse_names))
+ new_tokens = []
+ idx = None
+ for i, t in enumerate(tokens):
+ if t.pos >= part[0].pos and t.pos+len(t.val) <= part[-1].pos+len(part[-1].val):
+ if idx is None:
+ idx = i
+ new_tokens.extend(b_real)
+ else:
+ new_tokens.append(t)
+ new_code = stringify(new_tokens)
+ new_program = parse(new_code)
+ new_step = (idx, tuple(part), b_real)
+ if new_program is not None:
+ yield (new_program.freeze(), new_step, p)
# Main loop: best-first search through generated programs.
todo = PQueue() # priority queue of candidate solutions
done = set() # programs we have already visited
+ n_tested = 0
+ start_time = time.monotonic()
+ total_time = 0
# Each program gets a task with the sequence of edits that generated it and
# the associated cost. First candidate with cost 1 is the initial program.
- program = tuple(annotate(code))
- todo.push((program, (), 1.0), -1.0)
+ program = parse(code)
+ if program is None:
+ return '', [], total_time, n_tested
+ todo.push((program.freeze(), (), 1.0), -1.0)
- n_tested = 0
- start_time = time.monotonic()
- total_time = 0
while total_time < timeout:
# Get next candidate.
task = todo.pop()
@@ -245,7 +73,7 @@ def fix(name, code, edits, test, timeout=30, debug=False):
program, path, path_cost = task
# If we have already seen this code, skip it.
- code = compose(program)
+ code = stringify(program)
if code in done:
continue
done.add(code)
@@ -253,12 +81,11 @@ def fix(name, code, edits, test, timeout=30, debug=False):
# Print some info about the current task.
if debug:
print('Cost {:.12f}'.format(path_cost))
- for step_type, idx, a, b, _ in path:
- print('index {}: {} {} → {}'.format(idx, step_type, stringify(a), stringify(b)))
+ for idx, a, b in path:
+ print('{}: {} → {}'.format(idx, stringify(a), stringify(b)))
# If the code is correct, we are done.
if test(code):
- path = postprocess(path)
return code, path, total_time, n_tested
n_tested += 1
@@ -274,66 +101,32 @@ def fix(name, code, edits, test, timeout=30, debug=False):
return '', [], total_time, n_tested
-# Return tuples (type, start, end, message) describing edits in [path].
-def fix_hints(code, path):
- program = list(annotate(code))
-
- for step_type, idx, a, b in path:
- if step_type == 'add_rule':
- fix_type = 'insert'
- msg = 'Add another rule.'
- start = program[idx].pos-1
- end = start+2
- elif step_type == 'add_part':
- fix_type = 'insert'
- msg = 'Add another goal to this rule.'
- start = program[idx].pos-1
- end = start+2
- elif step_type == 'remove_rule':
- fix_type = 'remove'
- msg = 'Remove this rule.'
- start = program[idx].pos
- end = program[idx + len(a) - 1].pos
- elif step_type == 'remove_part':
- fix_type = 'remove'
- msg = 'Remove this goal.'
- start = program[idx].pos
- end = idx + len(a) - 1
- if program[end].type in ('COMMA', 'PERIOD', 'SEMI'):
- end -= 1
- end = program[end].pos + len(program[end].val)
- elif step_type == 'change_part':
- fix_type = 'change'
- msg = 'Check this part.'
- first = 0
- while idx+first < len(program)-1 and first < len(a) and first < len(b) and a[first] == b[first]:
- first += 1
- last = len(a)-1
- while last < len(b) and last >= first and a[last] == b[last]:
- last -= 1
- start = program[idx+first].pos
- end = program[idx+last].pos + len(program[idx+last].val)
-
- program[idx:idx+len(a)] = [t.clone(pos=program[idx].pos) for t in b]
- yield fix_type, start, end, msg
-
-
-# Checks for typos in the code and suggest the nearst uploaded term by other users.
-def check_typos(code, names):
- for token in annotate(code):
- if token.type == 'NAME':
- nearest_name = ' '
- nearest_dist = 1000
- own_count = names.get(token.val, 0) # count of the token.val which is compared with the
- # each name in the names
- for name in names.items():
- if name[0] == token.val: # If the names are the skip the code
- continue
-
- distance = damerau_levenshtein(token.val, name[0])
-
- if distance < nearest_dist and distance > 0 and own_count < name[1]:
- nearest_dist = distance # Set best_dist and best_name if the less one is found
- nearest_name = name[0]
- if nearest_dist > 0 and nearest_dist/len(nearest_name) <= 1/3:
- yield 'typo', token.pos, token.pos + len(token.val) , 'Did you mean "{}"?'.format(nearest_name)
+def min_diff(a, b):
+ first = 0
+ while first < len(a) and first < len(b) and a[first] == b[first]:
+ first += 1
+ last = 0
+ while first-last < len(a) and first-last < len(b) and a[last-1] == b[last-1]:
+ last -= 1
+ return first, last
+
+def fix_hints(code, steps):
+ hints = []
+ tokens = tokenize(code)
+ for idx, a, b in steps:
+ start, end = min_diff(a, b)
+
+ if start >= len(a)+end:
+ hint_id = 'monkey_insert'
+ elif start >= len(b)+end:
+ hint_id = 'monkey_remove'
+ else:
+ hint_id = 'monkey_change'
+
+ pos_start = tokens[idx+start].pos
+ pos_end = tokens[idx+len(a)+end-1].pos + len(tokens[idx+len(a)+end-1].val)
+ pos_end = max(pos_end, pos_start+1)
+
+ hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}]
+ tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b]
+ return hints
diff --git a/monkey/edits.py b/monkey/edits.py
index 17f13d3..7744fa2 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -19,26 +19,20 @@ import math
from .action import expand, parse
from .graph import Node
-from prolog.util import annotate, normalized, rename_vars, stringify, tokenize
+from prolog.util import Token, normalized, parse as prolog_parse, rename_vars, rename_vars_ast2, rename_vars_list, interesting_ranges, stringify, tokenize
from .util import get_line, avg, logistic
-# Parse the sequence of actions in [trace] and return a directed acyclic graph
-# representing development history. Each node represents a particular version
-# of some line, and each edge represents a "line edit" (contiguous sequence of
-# inserts/removes within a single line).
-# Return a list of nodes with root as the first element. Also return sets of
-# submissions (user-tested program versions) and queries in this attempt.
-def trace_graph(trace):
- # Return values.
- nodes = [Node([0, 0, ()])] # Node data: rank (Y), line no. (X), and tokens.
+def get_edits_from_trace(trace, test, id):
submissions = set() # Program versions at 'test' actions.
queries = set() # Queries run by the student.
- # State variables.
- leaves = {0: nodes[0]} # Current leaf node for each line.
- rank = 1 # Rank (order / y-position) for the next node.
- code_next = '' # Program code after applying the current action.
- done = False # Set to True on first correct version.
+ # For each observed edit, store a list of features (with repeats) of ASTs
+ # where they were observed.
+ edits = collections.defaultdict(list)
+ def add_edit(path, start, end, tree):
+ if start == end:
+ return
+ edits[(path, start, end)].append(id)
# Parse trace actions and ensure there is a separate action for each
# inserted/removed character.
@@ -49,194 +43,139 @@ def trace_graph(trace):
# Only a few traces fail to parse, so just skip them.
actions = []
+ # State variables.
+ open_edits = []
+ code_next = '' # Program code after applying the current action.
+ done = False # Set to True on first correct version.
+ prev_tree = None
+ prev_action = None
+
for action_id, action in enumerate(actions):
code = code_next
code_next = action.apply(code)
- if action.type == 'test':
- submissions.add((code, action.total == action.passed))
- if action.total == action.passed:
- done = True
-
- elif action.type == 'solve' or action.type == 'solve_all':
- queries.add(action.query.rstrip(' .'))
-
- elif action.type == 'insert' or action.type == 'remove':
- # Ignore edits after the first correct version.
- if done:
- continue
-
- # Number of the changed line.
- line = code[:action.offset].count('\n')
- # Current leaf node for this line.
- parent = leaves[line]
- # Tokens in this line after applying [action].
- tokens_next = tuple(tokenize(get_line(code_next, line)))
-
- # If a new node is inserted, clone each leaf into the next rank.
- # This makes it easier to print the graph for graphviz; when
- # analyzing the graph, duplicate nodes without siblings should be
- # ignored.
- new_leaves = {}
-
- if action.text == '\n':
- if action.type == 'insert':
- tokens_next_right = tuple(tokenize(get_line(code_next, line+1)))
-
- child_left = Node([rank, line, tokens_next])
- parent.add_out(child_left)
-
- child_right = Node([rank, line+1, tokens_next_right])
- parent.add_out(child_right)
-
- # Create new leaf nodes.
- for i, leaf in leaves.items():
- if i < line:
- new_leaves[i] = Node([rank, i, leaf.data[2]])
- leaf.add_out(new_leaves[i])
- elif i > line:
- new_leaves[i+1] = Node([rank, i+1, leaf.data[2]])
- leaf.add_out(new_leaves[i+1])
- new_leaves[line] = child_left
- new_leaves[line+1] = child_right
-
- elif action.type == 'remove':
- parent_right = leaves[line+1]
-
- child = Node([rank, line, tokens_next])
- parent_right.add_out(child)
- parent.add_out(child)
-
- # Create new leaf nodes.
- for i, leaf in leaves.items():
- if i < line:
- new_leaves[i] = Node([rank, i, leaf.data[2]])
- leaf.add_out(new_leaves[i])
- elif i > line+1:
- new_leaves[i-1] = Node([rank, i-1, leaf.data[2]])
- leaf.add_out(new_leaves[i-1])
- new_leaves[line] = child
- else:
- # Skip the node if the next action is insert/remove (except \n)
- # on the same line.
- if action_id < len(actions)-1:
- action_next = actions[action_id+1]
- if action_next.type in ('insert', 'remove'):
- line_next = code_next[:action_next.offset].count('\n')
- if action_next.text != '\n' and line == line_next:
- continue
-
- # Skip the node if it is the same as the parent.
- if tokens_next == parent.data[2]:
- continue
-
- child = Node([rank, line, tokens_next])
- parent.add_out(child)
-
- # Create new leaf nodes.
- for i, leaf in leaves.items():
- if i != line:
- new_leaves[i] = Node([rank, i, leaf.data[2]])
- leaf.add_out(new_leaves[i])
- new_leaves[line] = child
-
- leaves = new_leaves
- nodes += leaves.values()
- rank += 1
-
- return nodes, submissions, queries
-
-# Return a set of edits that appear in the trace_graph given by [nodes].
-def graph_edits(nodes):
- edits = set()
- for a in nodes:
- a_data = a.data[2]
- for b in a.eout:
- b_data = b.data[2]
-
- # Normalize a → b into start → end. Reuse variable names from a
- # when normalizing b.
- var_names = {}
- start = normalized(a_data, var_names)
- end = normalized(b_data, var_names)
-
- if start == end:
- continue
-
- # An edit start → ε happens each time the user inserts \n; ignore
- # such cases.
- if not end and len(a.eout) > 1:
- continue
-
- # Skip edits where start/end is composed of more than one part.
- # TODO improve trace_graph to handle this instead.
- if [t for t in annotate(stringify(start)) if t.part > 0]:
- continue
- if [t for t in annotate(stringify(end)) if t.part > 0]:
- continue
-
- edits.add((start, end))
- return edits
-
-# Build an edit graph for each trace and find "meaningful" (to be defined)
-# edits. Return a dictionary of edits and their frequencies, and also
-# submissions and queries in [traces].
-def get_edits_from_traces(traces):
- # Return values: counts for observed edits, lines, submissions and queries.
- edits = collections.Counter()
+ if action.type in {'solve', 'solve_all', 'test'}:
+ if action.type == 'solve' or action.type == 'solve_all':
+ queries.add(action.query.rstrip(' .'))
+ elif action.type == 'test':
+ correct = test(code)
+ submissions.add((code, correct))
+ if correct:
+ # Ignore actions after the first correct version.
+ done = True
+ break
+
+ tree = prolog_parse(code)
+ if tree and tree.leaves() and tree != prev_tree:
+ for terminals, path in interesting_ranges(tree):
+ pos_start = terminals[0].pos
+ pos_end = terminals[-1].pos + len(terminals[-1].val)
+ # If there is an open edit with the same range, don't add a new one.
+ found = False
+ for e_start_tree, e_start_tokens, e_path, e_pos_start, e_pos_end in open_edits:
+ if e_pos_start == pos_start and e_pos_end == pos_end:
+ found = True
+ break
+ if not found:
+ #print('OPENING {}'.format(terminals))
+ open_edits.append([tree, terminals, path, pos_start, pos_end])
+ prev_tree = tree
+
+ if action.type in {'insert', 'remove'}:
+ new_open_edits = []
+ for start_tree, start_tokens, path, pos_start, pos_end in open_edits:
+ new_pos_start, new_pos_end = pos_start, pos_end
+ if action.type == 'remove':
+ if action.offset < pos_end:
+ new_pos_end -= 1
+ if action.offset < pos_start:
+ new_pos_start -= 1
+ elif action.type == 'insert':
+ if action.offset < pos_start:
+ new_pos_start += 1
+ new_pos_end += 1
+ elif action.offset == pos_start:
+ new_pos_end += 1
+ elif action.offset < pos_end:
+ new_pos_end += 1
+ elif action.offset == pos_end:
+ if (prev_action is None or
+ prev_action.type == 'insert' and prev_action.offset == action.offset-1 or
+ prev_action.type == 'remove' and prev_action.offset == action.offset-1):
+ orig_next = None
+ for terminal in start_tree.leaves():
+ if terminal.pos >= start_tokens[-1].pos + len(start_tokens[-1].val):
+ orig_next = terminal
+ break
+ if not (orig_next and orig_next.val[0] == action.text):
+ new_pos_end += 1
+ pass
+ if new_pos_start != new_pos_end:
+ new_open_edits.append([start_tree, start_tokens, path, new_pos_start, new_pos_end])
+ open_edits = new_open_edits
+ prev_action = action
+
+ if done:
+ for start_tree, start_tokens, path, pos_start, pos_end in open_edits:
+ end_tokens = tokenize(code[pos_start:pos_end])
+ names = {}
+ start_normal = rename_vars_list(start_tokens, names)
+ end_normal = rename_vars_list(end_tokens, names)
+ norm_tree = rename_vars_ast2(start_tree, names)
+ add_edit(path, tuple(start_normal), tuple(end_normal), norm_tree)
+
+ return edits, submissions, queries
+
+def get_edits_from_solutions(solutions, test):
+ # For each observed edit, store a list of features (with repeats) of ASTs
+ # where they were observed.
submissions = collections.Counter()
queries = collections.Counter()
- names = collections.Counter()
+ edits = collections.defaultdict(list)
+
+ for solution in solutions:
+ trace = solution.trace
+ uid = solution.codeq_user_id
+ trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid)
- # Counts of traces where each line appears as a leaf / any node.
- n_leaf = collections.Counter()
- n_all = collections.Counter()
+ # Update edits.
+ for edit, features in trace_edits.items():
+ edits[edit].extend(features)
- for trace in traces:
- nodes, trace_submissions, trace_queries = trace_graph(trace)
- counted_tokens = []
- # Update the submissions/queries counters (use normalized variables).
- for (submission, correct) in trace_submissions:
- if correct:
- tokens = list(tokenize(submission))
- for token in tokens:
- if token.type == 'NAME' and token.val not in counted_tokens:
- names[token.val] += 1
- counted_tokens.append(token.val)
- code = stringify(rename_vars(tokens))
- submissions[code] += 1
+ # Update submission/query counters (use normalized variables).
+ for code, correct in trace_submissions:
+ code = stringify(rename_vars(tokenize(code)))
+ submissions[(code, correct)] += 1
for query in trace_queries:
code = stringify(rename_vars(tokenize(query)))
queries[code] += 1
- # Update the edit and leaf/node counters.
- edits.update(graph_edits(nodes))
- n_leaf.update(set([normalized(n.data[2]) for n in nodes if n.data[2] and not n.eout]))
- n_all.update(set([normalized(n.data[2]) for n in nodes if n.data[2]]))
-
# Discard edits that only occur in one trace.
- singletons = [edit for edit in edits if edits[edit] < 2]
+ singletons = [edit for edit in edits if len(edits[edit]) < 2]
for edit in singletons:
del edits[edit]
+ n_start = collections.Counter()
+ n_start_all = 0
+ for (path, a, b), features in edits.items():
+ edits[(path, a, b)] = len(features)
+ n_start[(path, a)] += len(features)
+ n_start_all += len(features)
+
# Find the probability of each edit a → b.
- for (a, b), count in edits.items():
- p = 1.0
- if a:
- p *= 1 - (n_leaf[a] / (n_all[a]+1))
- if b:
- b_normal = normalized(b)
- p *= n_leaf[b_normal] / (n_all[b_normal]+1)
- if a and b:
- p = math.sqrt(p)
- edits[(a, b)] = p
+ new_edits = {}
+ for (path, a, b), count in edits.items():
+ if a != b:
+ p = count / n_start[(path, a)]
+ new_edits[(path, a, b)] = p
+ edits = new_edits
# Tweak the edit distribution to improve search.
avg_p = avg(edits.values())
for edit, p in edits.items():
edits[edit] = logistic(p, k=3, x_0=avg_p)
- return edits, submissions, queries, names
+ return edits, submissions, queries
def classify_edits(edits):
inserts = {}
@@ -255,7 +194,9 @@ def classify_edits(edits):
# Extract edits and other data from existing traces for each problem.
if __name__ == '__main__':
import pickle
- from db.models import Problem, Solution
+ from db.models import CodeqUser, Problem, Solution
+ import server.problems
+ from termcolor import colored
# Ignore traces from these users.
ignored_users = [
@@ -265,21 +206,32 @@ if __name__ == '__main__':
358, # sasha
]
- edits, submissions, queries, names = {}, {}, {}, {}
+ edits, submissions, queries = {}, {}, {}
for problem in Problem.list():
pid = problem.id
- solutions = Solution.filter(problem_id=pid, done=True)
- traces = [s.trace for s in solutions if s.codeq_user_id not in ignored_users]
- if not traces:
+ solutions = [s for s in Solution.filter(problem_id=pid, done=True)
+ if s.codeq_user_id not in ignored_users]
+ if not solutions:
print('No traces for {}'.format(problem.identifier))
continue
+ # Testing function.
+ solved_problems = [p for p in CodeqUser.solved_problems(1, problem.language)
+ if p != (problem.group, problem.identifier)]
+ other_solutions = server.problems.solutions_for_problems(problem.language, solved_problems)
+ problem_module = server.problems.load_problem(problem.language, problem.group, problem.identifier, 'common')
+ def test(code):
+ correct, hints = problem_module.test(code, solved_problems)
+ return correct
+
print('Analyzing traces for {}… '.format(problem.identifier), end='', flush=True)
+ print('{} traces… '.format(len(solutions)), end='', flush=True)
try:
- edits[pid], submissions[pid], queries[pid], names[pid] = get_edits_from_traces(traces)
- print('{} edits, {} submissions, {} queries, {} names'.format(
- len(edits[pid]), len(submissions[pid]), len(queries[pid]), len(names[pid])))
+ edits[pid], submissions[pid], queries[pid] = get_edits_from_solutions(solutions, test)
+ print('{} edits, {} submissions, {} queries'.format(
+ len(edits[pid]), len(submissions[pid]), len(queries[pid])))
except Exception as ex:
- print('error: {}'.format(ex))
+ import traceback
+ traceback.print_exc()
- pickle.dump((edits, submissions, queries, names), open('edits.pickle', 'wb'))
+ pickle.dump((edits, submissions, queries), open('edits.pickle', 'wb'))
diff --git a/monkey/test.py b/monkey/test.py
index bca55d8..9eb91e1 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -23,7 +23,7 @@ from termcolor import colored
from db.models import CodeqUser, Problem, Solution
from .graph import graphviz
from . import fix, fix_hints
-from prolog.util import annotate, compose, stringify
+from prolog.util import parse, tokenize, stringify
import server.problems
from .util import indent
@@ -48,8 +48,8 @@ def test(code):
traces = [s.trace for s in Solution.filter(problem_id=problem.id)]
# Load hint database stored in edits.pickle.
-edits, submissions, queries, names = pickle.load(open('edits.pickle', 'rb'))
-edits, submissions, queries, names = edits[problem.id], submissions[problem.id], queries[problem.id], names[problem.id]
+edits, submissions, queries = pickle.load(open('edits.pickle', 'rb'))
+edits, submissions, queries = edits[problem.id], submissions[problem.id], queries[problem.id]
# Load current status (programs for which a hint was found).
try:
@@ -61,13 +61,13 @@ def print_hint(code, solution, steps, fix_time, n_tested):
if solution:
print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green'))
print(colored(' Edits', 'blue'))
- for step_type, pos, a, b in steps:
- print(' {}: {} {} → {}'.format(pos, step_type, stringify(a), stringify(b)))
+ for idx, a, b in steps:
+ print(' {}: {} → {}'.format(idx, stringify(a), stringify(b)))
print(colored(' Hints', 'blue'))
- for fix_type, start, end, msg in fix_hints(code, steps):
- print(' {}-{}: {} (fix type: {})'.format(start, end, msg, fix_type))
+ for hint in fix_hints(code, steps):
+ print(' {}'.format(hint))
print(colored(' Final version', 'blue'))
- print(indent(compose(annotate(solution)), 2))
+ print(indent(stringify(parse(solution)), 2))
else:
print(colored('Hint not found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'red'))
@@ -88,36 +88,41 @@ if len(sys.argv) == 2:
# Try finding a fix.
print(colored('Analyzing program…', 'yellow'))
- solution, steps, fix_time, n_tested = fix(name, code, edits, test, debug=True)
+ solution, steps, fix_time, n_tested = fix(code, edits, test, debug=True)
print_hint(code, solution, steps, fix_time, n_tested)
# Test fix() on incorrect student submissions.
elif sys.argv[2] == 'test':
timeout = int(sys.argv[3]) if len(sys.argv) == 4 else 10
-
- # Find incorrect submissions.
- incorrect_all = []
- for submission, count in sorted(submissions.items()):
- if not test(submission):
- # This incorrect submission appeared in [count] traces.
- incorrect_all += [submission]*count
- incorrect = set(incorrect_all)
+ incorrect = []
+ for (code, correct), count in sorted(submissions.items()):
+ # Skip syntactically-incorrect submissions.
+ if parse(code) is None:
+ continue
+ if not correct:
+ incorrect += [code] * count
print('Fixing {}/{} programs (timeout={})…'.format(
- len([p for p in incorrect if p not in done]), len(incorrect), timeout))
+ len([code for code in incorrect if code not in done]),
+ len(incorrect), timeout))
+ undone = []
for i, program in enumerate(sorted(incorrect)):
if program in done:
+ done.append(program)
+ continue
+ if program in undone:
continue
- print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
- print(indent(compose(annotate(program)), 2))
- solution, steps, fix_time, n_tested = fix(name, program, edits, test, timeout=timeout)
- if solution:
- done.append(program)
+ print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
+ solution, steps, fix_time, n_tested = fix(program, edits, test, timeout=timeout, debug=True)
print_hint(program, solution, steps, fix_time, n_tested)
print()
+ if solution:
+ done.append(program)
+ else:
+ undone.append(program)
pickle.dump(done, open('status-'+str(problem.id)+'.pickle', 'wb'))
print('Found hints for ' + str(len(done)) + ' of ' + str(len(incorrect)) + ' incorrect programs')
@@ -126,7 +131,7 @@ elif sys.argv[2] == 'test':
elif sys.argv[2] == 'info':
# With no additional arguments, print some stats.
if len(sys.argv) == 3:
- print('Problem {} ({}): {} edits and {} unique submissions in {} traces'.format(
+ print('Problem {} ({}): {} edits and {} different submissions in {} traces'.format(
problem.id, colored(name, 'yellow'),
colored(str(len(edits)), 'yellow'),
colored(str(len(submissions)), 'yellow'),
@@ -149,5 +154,9 @@ elif sys.argv[2] == 'info':
# Print all student submissions and their counts.
elif sys.argv[3] == 'submissions':
- for submission, count in submissions.most_common():
- print('{}\t{}'.format(count, submission))
+ which = None
+ if len(sys.argv) > 4:
+ which = sys.argv[4] == 'good'
+ for (code, correct), count in submissions.most_common():
+ if which is None or correct == which:
+ print('{}\t{}'.format(count, code))