From 4204f59b524a447b49dc067d142600e347cc7d88 Mon Sep 17 00:00:00 2001
From: Timotej Lazar <timotej.lazar@araneo.org>
Date: Tue, 10 Feb 2015 00:10:12 +0100
Subject: Move normalize to prolog.util

---
 monkey/edits.py  | 23 ++++++-----------------
 monkey/monkey.py |  2 +-
 prolog/util.py   | 11 +++++++++++
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/monkey/edits.py b/monkey/edits.py
index 8747a6e..15734c4 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -5,7 +5,7 @@ import math
 
 from .action import expand, parse
 from .graph import Node
-from prolog.util import rename_vars, stringify, tokenize
+from prolog.util import normalized, rename_vars, stringify, tokenize
 from .util import get_line, avg, logistic
 
 # A line edit is a contiguous sequences of actions within a single line. This
@@ -154,17 +154,6 @@ def get_paths(root, path=None, done=None):
 # edits. Return a dictionary of edits and their frequencies, and also
 # submissions and queries in [traces].
 def get_edits_from_traces(traces):
-    # Helper function to remove trailing punctuation from lines and rename
-    # variables to A1,A2,A3,… (potentially using [var_names]). Return a tuple.
-    def normalize(line, var_names=None):
-        # Remove trailing punctuation.
-        i = len(line)
-        while i > 0:
-            if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'):
-                break
-            i -= 1
-        return tuple(rename_vars(line[:i], var_names))
-
     # Return values: counts for observed edits, lines, submissions and queries.
     edits = collections.Counter()
     submissions = collections.Counter()
@@ -198,8 +187,8 @@ def get_edits_from_traces(traces):
                 # Normalize path[i-1] → path[i] into start → end. Reuse
                 # variable names from start when normalizing end.
                 var_names = {}
-                start = normalize(path[i-1], var_names)
-                end = normalize(path[i], var_names)
+                start = normalized(path[i-1], var_names)
+                end = normalized(path[i], var_names)
 
                 # Disallow edits that insert a whole rule (a → … :- …).
                 # TODO improve edit_graph to handle this.
@@ -213,8 +202,8 @@ def get_edits_from_traces(traces):
         edits.update(trace_edits)
 
         # Update node counts.
-        n_leaf.update(set([normalize(n.data[2]) for n in nodes if n.data[2] and not n.eout]))
-        n_all.update(set([normalize(n.data[2]) for n in nodes if n.data[2]]))
+        n_leaf.update(set([normalized(n.data[2]) for n in nodes if n.data[2] and not n.eout]))
+        n_all.update(set([normalized(n.data[2]) for n in nodes if n.data[2]]))
 
     # Discard edits that only occur in one trace.
     singletons = [edit for edit in edits if edits[edit] < 2]
@@ -227,7 +216,7 @@ def get_edits_from_traces(traces):
         if a:
             p *= 1 - (n_leaf[a] / (n_all[a]+1))
         if b:
-            b_normal = normalize(b)
+            b_normal = normalized(b)
             p *= n_leaf[b_normal] / (n_all[b_normal]+1)
         if a and b:
             p = math.sqrt(p)
diff --git a/monkey/monkey.py b/monkey/monkey.py
index 0a934bc..080b317 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -5,7 +5,7 @@ import time
 
 from .edits import classify_edits
 from prolog.engine import test
-from prolog.util import Token, compose, decompose, map_vars, rename_vars, stringify
+from prolog.util import Token, compose, decompose, map_vars, normalized, rename_vars, stringify
 from .util import PQueue
 
 # Starting from [code], find a sequence of edits that transforms it into a
diff --git a/prolog/util.py b/prolog/util.py
index e5a93e2..48e3345 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -156,6 +156,17 @@ def rename_vars(tokens, names=None):
             tokens[i] = Token('VARIABLE', names[cur_name])
     return tokens
 
+# Helper function to remove trailing punctuation from lines and rename
+# variables to A1,A2,A3,… (potentially using [var_names]). Return a tuple.
+def normalized(line, var_names=None):
+    # Remove trailing punctuation.
+    i = len(line)
+    while i > 0:
+        if line[i-1].type not in ('COMMA', 'PERIOD', 'SEMI'):
+            break
+        i -= 1
+    return tuple(rename_vars(line[:i], var_names))
+
 # transformation = before → after; applied on line which is part of rule
 # return mapping from formal vars in before+after to actual vars in rule
 # line and rule should of course not be normalized
-- 
cgit v1.2.1