2 files changed, 34 insertions, 30 deletions
diff --git a/monkey/monkey.py b/monkey/monkey.py
index f5b69a9..79584e0 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -29,7 +29,8 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
 
         for start, end in rules:
             rule_lines = lines[start:end]
-            rule_tokens = [t for line in rule_lines for t in line]
+            rule_vars = [t.val for line in rule_lines for t in line
+                               if t.type == 'VARIABLE' and t.val != '_']
 
             # Prepend a new rule (fact) before this rule (only if no line in
             # the current rule has been modified yet).
@@ -60,10 +61,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
                 for (before, after), cost in changes.items():
                     if line_normal == before:
                         seen = True
-                        mapping = map_vars(before, after, line, rule_tokens)
-                        after_real = tuple([t if t.type != 'VARIABLE'
-                                              else Token('VARIABLE', mapping[t.val])
-                                              for t in after])
+                        after_real = tuple(map_vars(before, after, line, rule_vars))
                         new_lines = lines[:line_idx] + (after_real,) + lines[line_idx+1:]
                         new_step = ('change_line', line_idx, (tuple(line), after_real))
 
@@ -88,10 +86,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
                     # Don't try to insert a head into the body.
                     if after[-1].type == 'FROM':
                         continue
-                    mapping = map_vars([], after, [], rule_tokens)
-                    after_real = tuple([t if t.type != 'VARIABLE'
-                                          else Token('VARIABLE', mapping[t.val])
-                                          for t in after])
+                    after_real = tuple(map_vars([], after, [], rule_vars))
 
                     idx = line_idx+1
                     new_lines = lines[:idx] + (after_real,) + lines[idx:]
diff --git a/prolog/util.py b/prolog/util.py
index 48e3345..30f12da 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -6,18 +6,18 @@ from .lexer import lexer, operators
 
 # Stores a token's type and value, and optionally the position of the first
 # character in the lexed stream.
-class Token(namedtuple('Token', ['type', 'val', 'pos'])):
+class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])):
     __slots__ = ()
 
     # Custom constructor to support default parameters.
-    def __new__(cls, type, val='', pos=None):
-        return super(Token, cls).__new__(cls, type, val, pos)
+    def __new__(cls, type, val='', pos=None, rule=None, part=None, stop=False):
+        return super(Token, cls).__new__(cls, type, val, pos, rule, part, stop)
 
     def __str__(self):
         return self.val
 
-    # Ignore position when comparing tokens. There is probably a cleaner way of
-    # doing these.
+    # Only consider type and value when comparing tokens. There is probably a
+    # cleaner way of doing this.
     __eq__ = lambda x, y: x[0] == y[0] and x[1] == y[1]
     __ne__ = lambda x, y: x[0] != y[0] or x[1] != y[1]
     __lt__ = lambda x, y: tuple.__lt__(x[0:2], y[0:2])
@@ -30,6 +30,15 @@ class Token(namedtuple('Token', ['type', 'val', 'pos'])):
     def __hash__(self):
         return hash(self[1])
 
+# Return a new Token, possibly modifying some fields.
+def clone_token(token, val=None, pos=None, rule=None, part=None):
+    return Token(token.type,
+                 token.val if val is None else val,
+                 token.pos if pos is None else pos,
+                 token.rule if rule is None else rule,
+                 token.part if part is None else part,
+                 token.stop)
+
 # Return a list of tokens in [text].
 def tokenize(text):
     lexer.input(text)
@@ -167,24 +176,25 @@ def normalized(line, var_names=None):
         i -= 1
     return tuple(rename_vars(line[:i], var_names))
 
-# transformation = before → after; applied on line which is part of rule
-# return mapping from formal vars in before+after to actual vars in rule
-# line and rule should of course not be normalized
-def map_vars(before, after, line, rule):
+# Map "formal" variable names in the edit a→b to actual names in code [tokens].
+# The set [variables] contains all variable names in the current scope. These
+# are used in cases such as [A]→[A,B], where the edit introduces new variables.
+# Return a new version of b with actual variable names.
+def map_vars(a, b, tokens, variables):
     mapping = {}
     new_index = 0
-    for i in range(len(before)):
-        if line[i].type == 'VARIABLE':
-            formal_name = before[i].val
-            if line[i].val != '_':
-                actual_name = line[i].val
+    for i in range(len(a)):
+        if tokens[i].type == 'VARIABLE':
+            formal_name = a[i].val
+            if tokens[i].val != '_':
+                actual_name = tokens[i].val
             else:
                 actual_name = 'New'+str(new_index)
                 new_index += 1
             mapping[formal_name] = actual_name
 
-    remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()]
-    remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()]
+    remaining_formal = [t.val for t in b if t.type == 'VARIABLE' and t.val not in mapping.keys()]
+    remaining_actual = [var for var in variables if var not in mapping.values()]
 
     while len(remaining_actual) < len(remaining_formal):
         remaining_actual.append('New'+str(new_index))
@@ -193,7 +203,7 @@ def map_vars(before, after, line, rule):
     for i, formal_name in enumerate(remaining_formal):
         mapping[formal_name] = remaining_actual[i]
 
-    return mapping
+    return [t if t.type != 'VARIABLE' else clone_token(t, val=mapping[t.val]) for t in b]
 
 # Basic sanity check.
 if __name__ == '__main__':
@@ -206,7 +216,6 @@ if __name__ == '__main__':
     after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names)
 
     line = lines[0]
-    rule = tokenize(code)
-
-    mapping = map_vars(before, after, line, rule)
-    print(mapping)
+    variables = [t.val for t in tokenize(code) if t.type == 'VARIABLE']
+    mapped = map_vars(before, after, line, variables)
+    print(mapped)