Move normalize to prolog.util

author: Timotej Lazar <timotej.lazar@araneo.org> 2015-02-10 00:10:12 +0100
committer: Aleš Smodiš <aless@guru.si> 2015-08-11 14:26:02 +0200
commit: 4204f59b524a447b49dc067d142600e347cc7d88 (patch)
tree: 9eb8385a6fdc1ab88731b80160f374ba83784bfa /monkey
parent: 12c1e264dbb8ec979c17da362ff120a3fa87039c (diff)
2 files changed, 7 insertions, 18 deletions
diff --git a/monkey/edits.py b/monkey/edits.py
index 8747a6e..15734c4 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -5,7 +5,7 @@ import math
 
 from .action import expand, parse
 from .graph import Node
-from prolog.util import rename_vars, stringify, tokenize
+from prolog.util import normalized, rename_vars, stringify, tokenize
 from .util import get_line, avg, logistic
 
 # A line edit is a contiguous sequences of actions within a single line. This
@@ -154,17 +154,6 @@ def get_paths(root, path=None, done=None):
 # edits. Return a dictionary of edits and their frequencies, and also
 # submissions and queries in [traces].
 def get_edits_from_traces(traces):
-    # Helper function to remove trailing punctuation from lines and rename
-    # variables to A1,A2,A3,… (potentially using [var_names]). Return a tuple.
-    def normalize(line, var_names=None):
-        # Remove trailing punctuation.
-        i = len(line)
-        while i > 0:
-            if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'):
-                break
-            i -= 1
-        return tuple(rename_vars(line[:i], var_names))
-
     # Return values: counts for observed edits, lines, submissions and queries.
     edits = collections.Counter()
     submissions = collections.Counter()
@@ -198,8 +187,8 @@ def get_edits_from_traces(traces):
                 # Normalize path[i-1] → path[i] into start → end. Reuse
                 # variable names from start when normalizing end.
                 var_names = {}
-                start = normalize(path[i-1], var_names)
-                end = normalize(path[i], var_names)
+                start = normalized(path[i-1], var_names)
+                end = normalized(path[i], var_names)
 
                 # Disallow edits that insert a whole rule (a → … :- …).
                 # TODO improve edit_graph to handle this.
@@ -213,8 +202,8 @@ def get_edits_from_traces(traces):
         edits.update(trace_edits)
 
         # Update node counts.
-        n_leaf.update(set([normalize(n.data[2]) for n in nodes if n.data[2] and not n.eout]))
-        n_all.update(set([normalize(n.data[2]) for n in nodes if n.data[2]]))
+        n_leaf.update(set([normalized(n.data[2]) for n in nodes if n.data[2] and not n.eout]))
+        n_all.update(set([normalized(n.data[2]) for n in nodes if n.data[2]]))
 
     # Discard edits that only occur in one trace.
     singletons = [edit for edit in edits if edits[edit] < 2]
@@ -227,7 +216,7 @@ def get_edits_from_traces(traces):
         if a:
             p *= 1 - (n_leaf[a] / (n_all[a]+1))
         if b:
-            b_normal = normalize(b)
+            b_normal = normalized(b)
             p *= n_leaf[b_normal] / (n_all[b_normal]+1)
         if a and b:
             p = math.sqrt(p)
diff --git a/monkey/monkey.py b/monkey/monkey.py
index 0a934bc..080b317 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -5,7 +5,7 @@ import time
 
 from .edits import classify_edits
 from prolog.engine import test
-from prolog.util import Token, compose, decompose, map_vars, rename_vars, stringify
+from prolog.util import Token, compose, decompose, map_vars, normalized, rename_vars, stringify
 from .util import PQueue
 
 # Starting from [code], find a sequence of edits that transforms it into a
author	Timotej Lazar <timotej.lazar@araneo.org>	2015-02-10 00:10:12 +0100
committer	Aleš Smodiš <aless@guru.si>	2015-08-11 14:26:02 +0200
commit	4204f59b524a447b49dc067d142600e347cc7d88 (patch)
tree	9eb8385a6fdc1ab88731b80160f374ba83784bfa /monkey
parent	12c1e264dbb8ec979c17da362ff120a3fa87039c (diff)