From 6a104bf8e2baea162d7f9f1d439dd8f671ddd413 Mon Sep 17 00:00:00 2001
From: Timotej Lazar <timotej.lazar@araneo.org>
Date: Wed, 4 Feb 2015 18:17:03 +0100
Subject: Clean up monkey.prolog.util

---
 monkey/edits.py       | 24 ++++++++++------------
 monkey/monkey.py      |  5 +----
 monkey/prolog/util.py | 56 +++++++++++++++++++++++++--------------------------
 3 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/monkey/edits.py b/monkey/edits.py
index 58843f6..3e0ae08 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -170,31 +170,29 @@ def get_edits_from_traces(traces):
         # Update the submissions/queries counters; rename variables first to
         # remove trivial differences.
         for submission in trace_submissions:
-            tokens = tokenize(submission)
-            rename_vars(tokens)
-            code = stringify(tokens)
+            code = stringify(rename_vars(tokenize(submission)))
             submissions[code] += 1
 
         for query in trace_queries:
-            tokens = tokenize(query)
-            rename_vars(tokens)
-            code = stringify(tokens)
+            code = stringify(rename_vars(tokenize(query)))
             queries[code] += 1
 
         # Get edits.
+        done = set()
         for path in get_paths(nodes[0]):
             for i in range(len(path)):
-                start = list(remove_punct(path[i]))
-                var_names = rename_vars(start)
-                start_t = tuple(start)
+                var_names = {}
+                start = remove_punct(path[i])
+                start_t = tuple(rename_vars(start, var_names))
 
                 for j in range(len(path[i+1:])):
+                    var_names_copy = {k: v for k, v in var_names.items()}
                     end = list(remove_punct(path[i+1+j]))
-                    rename_vars(end, var_names)
-                    end_t = tuple(end)
+                    end_t = tuple(rename_vars(end, var_names_copy))
 
-                    if start_t != end_t:
-                        edit = (start_t, end_t)
+                    edit = (start_t, end_t)
+                    if start_t != end_t and edit not in done:
+                        done.add(edit)
                         edits[edit] += 1
                         lines[start_t] += 1
 
diff --git a/monkey/monkey.py b/monkey/monkey.py
index cae42ae..47bca5c 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -31,10 +31,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
                 if line_idx < start_line:
                     continue
                 line = lines[line_idx]
-
-                line_normal = list(line)
-                rename_vars(line_normal)
-                line_normal = tuple(line_normal)
+                line_normal = tuple(rename_vars(line))
 
                 seen = False
                 # Apply each edit that matches this line.
diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py
index 46f6c5c..8d8b266 100644
--- a/monkey/prolog/util.py
+++ b/monkey/prolog/util.py
@@ -1,24 +1,19 @@
 #!/usr/bin/python3
 
-from .lexer import lexer
+from .lexer import lexer, operators
 from ..util import Token
 
+# Return a list of tokens in [text].
 def tokenize(text):
     lexer.input(text)
     return [Token(t.type, t.value, t.lexpos) for t in lexer]
 
-operators = set([
-    'FROM', 'IMPLIES', 'NOT',
-    'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
-    'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
-    'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
-    'POW', 'SEMI'
-])
+# Return a one-line string representation of [tokens].
 def stringify(tokens):
     def token_str(t):
         if t.type in ('PERIOD', 'COMMA'):
             return str(t) + ' '
-        if t.type in operators:
+        if t.type in operators.values():
             return ' ' + str(t) + ' '
         return str(t)
     return ''.join(map(token_str, tokens))
@@ -32,7 +27,7 @@ def split(code):
             yield stringify(tokens[start:idx])
             start = idx + 1
 
-# return a list of lines in 'code', and a list of rule indexes
+# Return a list of lines in [code] and a list of rule ranges.
 def decompose(code):
     lines = []
     rules = []
@@ -68,9 +63,9 @@ def decompose(code):
             elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
                 parens.pop()
         line.append(t)
-    return tuple(lines), tuple(rules)
+    return lines, rules
 
-# pretty-print a list of rules
+# Format a list of [lines] according to [rules] (as returned by decompose).
 def compose(lines, rules):
     code = ''
     for start, end in rules:
@@ -89,25 +84,28 @@ def compose(lines, rules):
                 code += '\n'
     return code.strip()
 
-# standardize variable names in order of appearance
-def rename_vars(tokens, names={}):
-    # copy names so we don't fuck it up
-    names = {k: v for k, v in names.items()}
+# Rename variables in [tokens] to A0, A1, A2,… in order of appearance.
+def rename_vars(tokens, names=None):
+    if names is None:
+        names = {}
     next_id = len(names)
+
+    # Return a new list.
+    tokens = list(tokens)
     for i in range(len(tokens)):
         if tokens[i].type == 'PERIOD':
             names.clear()
             next_id = 0
         elif tokens[i] == Token('VARIABLE', '_'):
-            tokens[i] = Token('VARIABLE', 'A' + str(next_id))
+            tokens[i] = Token('VARIABLE', 'A{}'.format(next_id))
             next_id += 1
         elif tokens[i].type == 'VARIABLE':
             cur_name = tokens[i].val
             if cur_name not in names:
-                names[cur_name] = next_id
+                names[cur_name] = 'A{}'.format(next_id)
                 next_id += 1
-            tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name]))
-    return names
+            tokens[i] = Token('VARIABLE', names[cur_name])
+    return tokens
 
 # transformation = before → after; applied on line which is part of rule
 # return mapping from formal vars in before+after to actual vars in rule
@@ -139,14 +137,16 @@ def map_vars(before, after, line, rule):
 
 # Basic sanity check.
 if __name__ == '__main__':
-    print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). ')))
-
-    rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ')
-    line = tokenize('dup([H|T], [H1|T1]) :-')
-    before = tokenize("dup([A0|A1], [A2|A3])")
-    after = tokenize("dup([A0|A1], [A5, A4|A3])")
-    var_names = rename_vars(before)
-    rename_vars(after, var_names)
+    code = 'dup([H|T], [H1|T1]) :- dup(T1, T2). '
+    lines, rules = decompose(code)
+    print(compose(lines, rules))
+
+    var_names = {}
+    before = rename_vars(tokenize("dup([A0|A1], [A2|A3])"), var_names)
+    after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names)
+
+    line = lines[0]
+    rule = tokenize(code)
 
     mapping = map_vars(before, after, line, rule)
     print(mapping)
-- 
cgit v1.2.1