Clean up monkey.prolog.util

author: Timotej Lazar <timotej.lazar@araneo.org> 2015-02-04 18:17:03 +0100
committer: Aleš Smodiš <aless@guru.si> 2015-08-11 14:26:01 +0200
commit: 6a104bf8e2baea162d7f9f1d439dd8f671ddd413 (patch)
tree: dcff17886abf0db6c01107f8a202f344a4e98bfe /monkey/prolog
parent: 127bc6d1c3169a80c735b01588c968b85d2df944 (diff)
1 files changed, 28 insertions, 28 deletions
diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py
index 46f6c5c..8d8b266 100644
--- a/monkey/prolog/util.py
+++ b/monkey/prolog/util.py
@@ -1,24 +1,19 @@
 #!/usr/bin/python3
 
-from .lexer import lexer
+from .lexer import lexer, operators
 from ..util import Token
 
+# Return a list of tokens in [text].
 def tokenize(text):
     lexer.input(text)
     return [Token(t.type, t.value, t.lexpos) for t in lexer]
 
-operators = set([
-    'FROM', 'IMPLIES', 'NOT',
-    'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
-    'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
-    'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
-    'POW', 'SEMI'
-])
+# Return a one-line string representation of [tokens].
 def stringify(tokens):
     def token_str(t):
         if t.type in ('PERIOD', 'COMMA'):
             return str(t) + ' '
-        if t.type in operators:
+        if t.type in operators.values():
             return ' ' + str(t) + ' '
         return str(t)
     return ''.join(map(token_str, tokens))
@@ -32,7 +27,7 @@ def split(code):
             yield stringify(tokens[start:idx])
             start = idx + 1
 
-# return a list of lines in 'code', and a list of rule indexes
+# Return a list of lines in [code] and a list of rule ranges.
 def decompose(code):
     lines = []
     rules = []
@@ -68,9 +63,9 @@ def decompose(code):
             elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
                 parens.pop()
         line.append(t)
-    return tuple(lines), tuple(rules)
+    return lines, rules
 
-# pretty-print a list of rules
+# Format a list of [lines] according to [rules] (as returned by decompose).
 def compose(lines, rules):
     code = ''
     for start, end in rules:
@@ -89,25 +84,28 @@ def compose(lines, rules):
                 code += '\n'
     return code.strip()
 
-# standardize variable names in order of appearance
-def rename_vars(tokens, names={}):
-    # copy names so we don't fuck it up
-    names = {k: v for k, v in names.items()}
+# Rename variables in [tokens] to A0, A1, A2,… in order of appearance.
+def rename_vars(tokens, names=None):
+    if names is None:
+        names = {}
     next_id = len(names)
+
+    # Return a new list.
+    tokens = list(tokens)
     for i in range(len(tokens)):
         if tokens[i].type == 'PERIOD':
             names.clear()
             next_id = 0
         elif tokens[i] == Token('VARIABLE', '_'):
-            tokens[i] = Token('VARIABLE', 'A' + str(next_id))
+            tokens[i] = Token('VARIABLE', 'A{}'.format(next_id))
             next_id += 1
         elif tokens[i].type == 'VARIABLE':
             cur_name = tokens[i].val
             if cur_name not in names:
-                names[cur_name] = next_id
+                names[cur_name] = 'A{}'.format(next_id)
                 next_id += 1
-            tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name]))
-    return names
+            tokens[i] = Token('VARIABLE', names[cur_name])
+    return tokens
 
 # transformation = before → after; applied on line which is part of rule
 # return mapping from formal vars in before+after to actual vars in rule
@@ -139,14 +137,16 @@ def map_vars(before, after, line, rule):
 
 # Basic sanity check.
 if __name__ == '__main__':
-    print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). ')))
-
-    rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ')
-    line = tokenize('dup([H|T], [H1|T1]) :-')
-    before = tokenize("dup([A0|A1], [A2|A3])")
-    after = tokenize("dup([A0|A1], [A5, A4|A3])")
-    var_names = rename_vars(before)
-    rename_vars(after, var_names)
+    code = 'dup([H|T], [H1|T1]) :- dup(T1, T2). '
+    lines, rules = decompose(code)
+    print(compose(lines, rules))
+
+    var_names = {}
+    before = rename_vars(tokenize("dup([A0|A1], [A2|A3])"), var_names)
+    after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names)
+
+    line = lines[0]
+    rule = tokenize(code)
 
     mapping = map_vars(before, after, line, rule)
     print(mapping)
author	Timotej Lazar <timotej.lazar@araneo.org>	2015-02-04 18:17:03 +0100
committer	Aleš Smodiš <aless@guru.si>	2015-08-11 14:26:01 +0200
commit	6a104bf8e2baea162d7f9f1d439dd8f671ddd413 (patch)
tree	dcff17886abf0db6c01107f8a202f344a4e98bfe /monkey/prolog
parent	127bc6d1c3169a80c735b01588c968b85d2df944 (diff)