From 1a99d21e12dad4c01d4c892ed4e8a0506bcf36aa Mon Sep 17 00:00:00 2001
From: Timotej Lazar <timotej.lazar@araneo.org>
Date: Wed, 29 Jan 2014 13:23:04 +0100
Subject: Initial commit for pymonkey

---
 prolog/util.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 prolog/util.py

(limited to 'prolog/util.py')

diff --git a/prolog/util.py b/prolog/util.py
new file mode 100644
index 0000000..0aa0b09
--- /dev/null
+++ b/prolog/util.py
@@ -0,0 +1,114 @@
+#!/usr/bin/python3
+
+import math
+import re
+
+from .lexer import lexer
+
+# new lexer stuff
+def tokenize(text):
+    # feed the troll
+    lexer.input(text)
+    # we are not interested in line numbers and absolute positions
+    return [(t.type, t.value) for t in lexer]
+
+operators = set([
+    'FROM', 'IMPLIES', 'NOT',
+    'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
+    'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
+    'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
+    'POW', 'SEMI'
+])
+def stringify(tokens, indent=''):
+    s = indent
+    for t in tokens:
+        if t[0] in operators:
+            s += ' '
+
+        if t[0] == 'FROM':
+            s += ':-\n  ' + indent
+        elif t[0] == 'PERIOD':
+            s += '.\n' + indent
+        elif t[0] == 'COMMA':
+            s += ', '
+        elif t[0] in operators:
+            s += t[1] + ' '
+        else:
+            s += t[1]
+
+    return s
+
+# return a list of lines in 'code', and a list of rule indexes
+def decompose(code):
+    lines = []
+    rules = []
+    tokens = tokenize(code)
+
+    line = []
+    parens = []
+    rule_start = 0
+    for t in tokens:
+        if t[0] == 'SEMI':
+            lines.append(line[:])
+            lines.append([t])
+            line = []
+            continue
+        if not parens:
+            if t[0] in ('PERIOD', 'FROM', 'COMMA'):
+                if line != []:
+                    lines.append(line[:])
+                    line = []
+                if t[0] == 'PERIOD':
+                    rules.append((rule_start, len(lines)))
+                    rule_start = len(lines)
+                continue
+        if t[0] in ('LPAREN', 'LBRACKET', 'LBRACE'):
+            parens.append(t[0])
+        elif parens:
+            if t[0] == 'RPAREN' and parens[-1] == 'LPAREN':
+                parens.pop()
+            elif t[0] == 'RBRACKET' and parens[-1] == 'LBRACKET':
+                parens.pop()
+            elif t[0] == 'RBRACE' and parens[-1] == 'LBRACE':
+                parens.pop()
+        line.append(t)
+    return lines, rules
+
+# pretty-print a list of rules (no support for ; yet)
+def compose(lines, rules):
+    code = ''
+    for start, end in rules:
+        for i in range(start, end):
+            line = lines[i]
+            if i > start:
+                code += '  '
+            code += stringify(line).replace('\n', ' ')
+            if i == end-1:
+                code += '.\n'
+            elif i == start:
+                code += ' :-\n'
+            else:
+                if line and line[-1][0] != 'SEMI' and i < end-1 and lines[i+1][-1][0] != 'SEMI':
+                    code += ','
+                code += '\n'
+    return code
+
+# rename variables in order of appearance
+def rename_vars(tokens, names={}):
+    # copy names so we don't fuck it up
+    names = {k: v for k, v in names.items()}
+    next_id = len(names)
+    for i in range(len(tokens)):
+        if tokens[i][0] == 'PERIOD':
+            names.clear()
+            next_id = 0
+        elif tokens[i] == ('VARIABLE', '_'):
+            tokens[i] = ('VARIABLE', 'A' + str(next_id))
+            next_id += 1
+        elif tokens[i][0] == 'VARIABLE':
+            cur_name = tokens[i][1]
+            if cur_name not in names:
+                names[cur_name] = next_id
+                next_id += 1
+            tokens[i] = ('VARIABLE', 'A' + str(names[cur_name]))
+    return names
-- 
cgit v1.2.1