diff options
author | Timotej Lazar <timotej.lazar@araneo.org> | 2015-02-10 00:10:12 +0100 |
---|---|---|
committer | Aleš Smodiš <aless@guru.si> | 2015-08-11 14:26:02 +0200 |
commit | 4204f59b524a447b49dc067d142600e347cc7d88 (patch) | |
tree | 9eb8385a6fdc1ab88731b80160f374ba83784bfa /monkey | |
parent | 12c1e264dbb8ec979c17da362ff120a3fa87039c (diff) |
Move normalize to prolog.util
Diffstat (limited to 'monkey')
-rw-r--r-- | monkey/edits.py | 23 | ||||
-rwxr-xr-x | monkey/monkey.py | 2 |
2 files changed, 7 insertions, 18 deletions
diff --git a/monkey/edits.py b/monkey/edits.py index 8747a6e..15734c4 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -5,7 +5,7 @@ import math from .action import expand, parse from .graph import Node -from prolog.util import rename_vars, stringify, tokenize +from prolog.util import normalized, rename_vars, stringify, tokenize from .util import get_line, avg, logistic # A line edit is a contiguous sequences of actions within a single line. This @@ -154,17 +154,6 @@ def get_paths(root, path=None, done=None): # edits. Return a dictionary of edits and their frequencies, and also # submissions and queries in [traces]. def get_edits_from_traces(traces): - # Helper function to remove trailing punctuation from lines and rename - # variables to A1,A2,A3,… (potentially using [var_names]). Return a tuple. - def normalize(line, var_names=None): - # Remove trailing punctuation. - i = len(line) - while i > 0: - if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'): - break - i -= 1 - return tuple(rename_vars(line[:i], var_names)) - # Return values: counts for observed edits, lines, submissions and queries. edits = collections.Counter() submissions = collections.Counter() @@ -198,8 +187,8 @@ def get_edits_from_traces(traces): # Normalize path[i-1] → path[i] into start → end. Reuse # variable names from start when normalizing end. var_names = {} - start = normalize(path[i-1], var_names) - end = normalize(path[i], var_names) + start = normalized(path[i-1], var_names) + end = normalized(path[i], var_names) # Disallow edits that insert a whole rule (a → … :- …). # TODO improve edit_graph to handle this. @@ -213,8 +202,8 @@ def get_edits_from_traces(traces): edits.update(trace_edits) # Update node counts. - n_leaf.update(set([normalize(n.data[2]) for n in nodes if n.data[2] and not n.eout])) - n_all.update(set([normalize(n.data[2]) for n in nodes if n.data[2]])) + n_leaf.update(set([normalized(n.data[2]) for n in nodes if n.data[2] and not n.eout])) + n_all.update(set([normalized(n.data[2]) for n in nodes if n.data[2]])) # Discard edits that only occur in one trace. singletons = [edit for edit in edits if edits[edit] < 2] @@ -227,7 +216,7 @@ def get_edits_from_traces(traces): if a: p *= 1 - (n_leaf[a] / (n_all[a]+1)) if b: - b_normal = normalize(b) + b_normal = normalized(b) p *= n_leaf[b_normal] / (n_all[b_normal]+1) if a and b: p = math.sqrt(p) diff --git a/monkey/monkey.py b/monkey/monkey.py index 0a934bc..080b317 100755 --- a/monkey/monkey.py +++ b/monkey/monkey.py @@ -5,7 +5,7 @@ import time from .edits import classify_edits from prolog.engine import test -from prolog.util import Token, compose, decompose, map_vars, rename_vars, stringify +from prolog.util import Token, compose, decompose, map_vars, normalized, rename_vars, stringify from .util import PQueue # Starting from [code], find a sequence of edits that transforms it into a |