summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--monkey/edits.py52
-rwxr-xr-xmonkey/monkey.py2
2 files changed, 26 insertions, 28 deletions
diff --git a/monkey/edits.py b/monkey/edits.py
index a614e4e..333cb12 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -117,31 +117,29 @@ def edit_graph(actions, debug=False):
return nodes, submissions, queries
-# Return all interesting edit paths in the edit graph rooted at [root].
-def get_paths(root, path=tuple(), done=None):
+# Generate all interesting paths in the edit graph rooted at [root].
+def get_paths(root, path=None, done=None):
if done is None:
done = set()
- cur_path = list(path)
- if len(path) == 0 or path[-1] != root.data[2]:
- cur_path.append(root.data[2])
+ # Add [root] to [path] if it is the first node or different than last.
+ if not path:
+ path = (root.data[2],)
+ elif root.data[2] != path[-1]:
+ path = path + (root.data[2],)
- # leaf node
- if len(root.eout) == 0:
- yield tuple(cur_path)
- # empty node
- elif len(path) > 1 and len(root.data[2]) == 0:
- yield tuple(cur_path)
+ # Return the current path if [root] is a leaf or an empty node.
+ if len(path) > 1 and not (root.eout and root.data[2]):
+ yield path
- if len(root.data[2]) > 0:
- new_path = cur_path
- else:
- new_path = [root.data[2]]
+ # If [root] is an empty node, start a new path.
+ if not root.data[2]:
+ path = (root.data[2],)
done.add(root)
for node in root.eout:
if node not in done:
- yield from get_paths(node, tuple(new_path), done)
+ yield from get_paths(node, path, done)
# Build an edit graph for each trace and find "meaningful" (to be defined)
# edits. Return a dictionary of edits and their frequencies, and also
@@ -167,8 +165,7 @@ def get_edits_from_traces(traces):
continue
nodes, trace_submissions, trace_queries = edit_graph(actions)
- # Update the submissions/queries counters; rename variables first to
- # remove trivial differences.
+ # Update the submissions/queries counters (use normalized variables).
for submission in trace_submissions:
code = stringify(rename_vars(tokenize(submission)))
submissions[code] += 1
@@ -182,19 +179,19 @@ def get_edits_from_traces(traces):
for path in get_paths(nodes[0]):
for i in range(len(path)):
var_names = {}
- start = remove_punct(path[i])
- start_t = tuple(rename_vars(start, var_names))
+ start = tuple(rename_vars(remove_punct(path[i]), var_names))
for j in range(len(path[i+1:])):
var_names_copy = {k: v for k, v in var_names.items()}
- end = list(remove_punct(path[i+1+j]))
- end_t = tuple(rename_vars(end, var_names_copy))
+ end = tuple(rename_vars(remove_punct(path[i+1+j]), var_names_copy))
+ if start == end:
+ continue
- edit = (start_t, end_t)
- if start_t != end_t and edit not in done:
+ edit = (start, end)
+ if edit not in done:
done.add(edit)
edits[edit] += 1
- lines[start_t] += 1
+ lines[start] += 1
# Discard rarely occurring edits. XXX only for testing
singletons = [edit for edit in edits if edits[edit] < 2]
@@ -202,13 +199,13 @@ def get_edits_from_traces(traces):
lines[edit[0]] -= edits[edit]
del edits[edit]
- # Get the probability of each edit given its 'before' line.
+ # Get the probability of each edit given its [before] part.
for before, after in edits:
edits[(before, after)] /= lines[before]
# Normalize line frequencies.
if len(lines) > 0:
- lines_max = max(lines.values())
+ lines_max = max(max(lines.values()), 1)
lines = {line: count/lines_max for line, count in lines.items()}
return edits, lines, submissions, queries
@@ -320,6 +317,7 @@ if __name__ == '__main__':
submissions = {}
queries = {}
for problem in Problem.objects.all():
+ print(problem.name)
pid = problem.pk
traces = [a.trace for a in Attempt.objects.filter(problem=problem, done=True)]
edits[pid], lines[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces)
diff --git a/monkey/monkey.py b/monkey/monkey.py
index 962f25d..07d6b0a 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -59,7 +59,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
yield (new_lines, new_rules, new_step, new_cost)
- # Add a line to the current rule.
+ # Add a line at the end of the current rule.
for after, cost in inserts.items():
mapping = map_vars([], after, [], rule_tokens)
after_real = [t if t.type != 'VARIABLE' else Token('VARIABLE', mapping[t.val]) for t in after]