summaryrefslogtreecommitdiff
path: root/monkey/edits.py
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2015-02-04 23:48:56 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:01 +0200
commit4838e37e26c3fb72ad509d7aef7f307cc7ae3ef2 (patch)
treee09c3f476bf0d056cb7ddfcce8a68636327b74a6 /monkey/edits.py
parent92066f4993343037c79c93ecbedd2fdb22011320 (diff)
Small cleanups
Diffstat (limited to 'monkey/edits.py')
-rw-r--r--monkey/edits.py22
1 files changed, 13 insertions, 9 deletions
diff --git a/monkey/edits.py b/monkey/edits.py
index 333cb12..ad595b6 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -122,15 +122,16 @@ def get_paths(root, path=None, done=None):
if done is None:
done = set()
- # Add [root] to [path] if it is the first node or different than last.
+ # Add [root] to [path] if it is the first node or different than previous.
if not path:
path = (root.data[2],)
elif root.data[2] != path[-1]:
path = path + (root.data[2],)
# Return the current path if [root] is a leaf or an empty node.
- if len(path) > 1 and not (root.eout and root.data[2]):
- yield path
+ if len(path) > 1:
+ if not root.eout or not root.data[2]:
+ yield path
# If [root] is an empty node, start a new path.
if not root.data[2]:
@@ -175,12 +176,11 @@ def get_edits_from_traces(traces):
queries[code] += 1
# Get edits.
- done = set()
+ seen_edits = set()
for path in get_paths(nodes[0]):
for i in range(len(path)):
var_names = {}
start = tuple(rename_vars(remove_punct(path[i]), var_names))
-
for j in range(len(path[i+1:])):
var_names_copy = {k: v for k, v in var_names.items()}
end = tuple(rename_vars(remove_punct(path[i+1+j]), var_names_copy))
@@ -188,8 +188,8 @@ def get_edits_from_traces(traces):
continue
edit = (start, end)
- if edit not in done:
- done.add(edit)
+ if edit not in seen_edits:
+ seen_edits.add(edit)
edits[edit] += 1
lines[start] += 1
@@ -199,9 +199,13 @@ def get_edits_from_traces(traces):
lines[edit[0]] -= edits[edit]
del edits[edit]
- # Get the probability of each edit given its [before] part.
+ # Get the probability of each edit given its "before" or "after" part.
+ max_insert_count = max([count for (before, after), count in edits.items() if not before])
for before, after in edits:
- edits[(before, after)] /= lines[before]
+ if before:
+ edits[(before, after)] /= max(lines[before], 1)
+ else:
+ edits[(before, after)] /= max_insert_count
# Normalize line frequencies.
if len(lines) > 0: