From 92066f4993343037c79c93ecbedd2fdb22011320 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 4 Feb 2015 19:56:30 +0100 Subject: Clean up monkey.edits.get_paths --- monkey/edits.py | 52 +++++++++++++++++++++++++--------------------------- monkey/monkey.py | 2 +- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/monkey/edits.py b/monkey/edits.py index a614e4e..333cb12 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -117,31 +117,29 @@ def edit_graph(actions, debug=False): return nodes, submissions, queries -# Return all interesting edit paths in the edit graph rooted at [root]. -def get_paths(root, path=tuple(), done=None): +# Generate all interesting paths in the edit graph rooted at [root]. +def get_paths(root, path=None, done=None): if done is None: done = set() - cur_path = list(path) - if len(path) == 0 or path[-1] != root.data[2]: - cur_path.append(root.data[2]) + # Add [root] to [path] if it is the first node or different than last. + if not path: + path = (root.data[2],) + elif root.data[2] != path[-1]: + path = path + (root.data[2],) - # leaf node - if len(root.eout) == 0: - yield tuple(cur_path) - # empty node - elif len(path) > 1 and len(root.data[2]) == 0: - yield tuple(cur_path) + # Return the current path if [root] is a leaf or an empty node. + if len(path) > 1 and not (root.eout and root.data[2]): + yield path - if len(root.data[2]) > 0: - new_path = cur_path - else: - new_path = [root.data[2]] + # If [root] is an empty node, start a new path. + if not root.data[2]: + path = (root.data[2],) done.add(root) for node in root.eout: if node not in done: - yield from get_paths(node, tuple(new_path), done) + yield from get_paths(node, path, done) # Build an edit graph for each trace and find "meaningful" (to be defined) # edits. Return a dictionary of edits and their frequencies, and also @@ -167,8 +165,7 @@ def get_edits_from_traces(traces): continue nodes, trace_submissions, trace_queries = edit_graph(actions) - # Update the submissions/queries counters; rename variables first to - # remove trivial differences. + # Update the submissions/queries counters (use normalized variables). for submission in trace_submissions: code = stringify(rename_vars(tokenize(submission))) submissions[code] += 1 @@ -182,19 +179,19 @@ def get_edits_from_traces(traces): for path in get_paths(nodes[0]): for i in range(len(path)): var_names = {} - start = remove_punct(path[i]) - start_t = tuple(rename_vars(start, var_names)) + start = tuple(rename_vars(remove_punct(path[i]), var_names)) for j in range(len(path[i+1:])): var_names_copy = {k: v for k, v in var_names.items()} - end = list(remove_punct(path[i+1+j])) - end_t = tuple(rename_vars(end, var_names_copy)) + end = tuple(rename_vars(remove_punct(path[i+1+j]), var_names_copy)) + if start == end: + continue - edit = (start_t, end_t) - if start_t != end_t and edit not in done: + edit = (start, end) + if edit not in done: done.add(edit) edits[edit] += 1 - lines[start_t] += 1 + lines[start] += 1 # Discard rarely occurring edits. XXX only for testing singletons = [edit for edit in edits if edits[edit] < 2] @@ -202,13 +199,13 @@ def get_edits_from_traces(traces): lines[edit[0]] -= edits[edit] del edits[edit] - # Get the probability of each edit given its 'before' line. + # Get the probability of each edit given its [before] part. for before, after in edits: edits[(before, after)] /= lines[before] # Normalize line frequencies. if len(lines) > 0: - lines_max = max(lines.values()) + lines_max = max(max(lines.values()), 1) lines = {line: count/lines_max for line, count in lines.items()} return edits, lines, submissions, queries @@ -320,6 +317,7 @@ if __name__ == '__main__': submissions = {} queries = {} for problem in Problem.objects.all(): + print(problem.name) pid = problem.pk traces = [a.trace for a in Attempt.objects.filter(problem=problem, done=True)] edits[pid], lines[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces) diff --git a/monkey/monkey.py b/monkey/monkey.py index 962f25d..07d6b0a 100755 --- a/monkey/monkey.py +++ b/monkey/monkey.py @@ -59,7 +59,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): yield (new_lines, new_rules, new_step, new_cost) - # Add a line to the current rule. + # Add a line at the end of the current rule. for after, cost in inserts.items(): mapping = map_vars([], after, [], rule_tokens) after_real = [t if t.type != 'VARIABLE' else Token('VARIABLE', mapping[t.val]) for t in after] -- cgit v1.2.1