diff options
Diffstat (limited to 'monkey/edits.py')
-rw-r--r-- | monkey/edits.py | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/monkey/edits.py b/monkey/edits.py index 333cb12..ad595b6 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -122,15 +122,16 @@ def get_paths(root, path=None, done=None): if done is None: done = set() - # Add [root] to [path] if it is the first node or different than last. + # Add [root] to [path] if it is the first node or different than previous. if not path: path = (root.data[2],) elif root.data[2] != path[-1]: path = path + (root.data[2],) # Return the current path if [root] is a leaf or an empty node. - if len(path) > 1 and not (root.eout and root.data[2]): - yield path + if len(path) > 1: + if not root.eout or not root.data[2]: + yield path # If [root] is an empty node, start a new path. if not root.data[2]: @@ -175,12 +176,11 @@ def get_edits_from_traces(traces): queries[code] += 1 # Get edits. - done = set() + seen_edits = set() for path in get_paths(nodes[0]): for i in range(len(path)): var_names = {} start = tuple(rename_vars(remove_punct(path[i]), var_names)) - for j in range(len(path[i+1:])): var_names_copy = {k: v for k, v in var_names.items()} end = tuple(rename_vars(remove_punct(path[i+1+j]), var_names_copy)) @@ -188,8 +188,8 @@ def get_edits_from_traces(traces): continue edit = (start, end) - if edit not in done: - done.add(edit) + if edit not in seen_edits: + seen_edits.add(edit) edits[edit] += 1 lines[start] += 1 @@ -199,9 +199,13 @@ def get_edits_from_traces(traces): lines[edit[0]] -= edits[edit] del edits[edit] - # Get the probability of each edit given its [before] part. + # Get the probability of each edit given its "before" or "after" part. + max_insert_count = max([count for (before, after), count in edits.items() if not before]) for before, after in edits: - edits[(before, after)] /= lines[before] + if before: + edits[(before, after)] /= max(lines[before], 1) + else: + edits[(before, after)] /= max_insert_count # Normalize line frequencies. if len(lines) > 0: |