summaryrefslogtreecommitdiff
path: root/prolog/util.py
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2014-01-29 13:23:04 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:25:59 +0200
commit1a99d21e12dad4c01d4c892ed4e8a0506bcf36aa (patch)
tree5b98529f8fd2d546b1a7338fbe6765d4989aa5b3 /prolog/util.py
Initial commit for pymonkey
Diffstat (limited to 'prolog/util.py')
-rw-r--r--prolog/util.py114
1 files changed, 114 insertions, 0 deletions
diff --git a/prolog/util.py b/prolog/util.py
new file mode 100644
index 0000000..0aa0b09
--- /dev/null
+++ b/prolog/util.py
@@ -0,0 +1,114 @@
+#!/usr/bin/python3
+
+import math
+import re
+
+from .lexer import lexer
+
+# new lexer stuff
+def tokenize(text):
+ # feed the troll
+ lexer.input(text)
+ # we are not interested in line numbers and absolute positions
+ return [(t.type, t.value) for t in lexer]
+
+operators = set([
+ 'FROM', 'IMPLIES', 'NOT',
+ 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
+ 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
+ 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
+ 'POW', 'SEMI'
+])
+def stringify(tokens, indent=''):
+ s = indent
+ for t in tokens:
+ if t[0] in operators:
+ s += ' '
+
+ if t[0] == 'FROM':
+ s += ':-\n ' + indent
+ elif t[0] == 'PERIOD':
+ s += '.\n' + indent
+ elif t[0] == 'COMMA':
+ s += ', '
+ elif t[0] in operators:
+ s += t[1] + ' '
+ else:
+ s += t[1]
+
+ return s
+
+# return a list of lines in 'code', and a list of rule indexes
+def decompose(code):
+ lines = []
+ rules = []
+ tokens = tokenize(code)
+
+ line = []
+ parens = []
+ rule_start = 0
+ for t in tokens:
+ if t[0] == 'SEMI':
+ lines.append(line[:])
+ lines.append([t])
+ line = []
+ continue
+ if not parens:
+ if t[0] in ('PERIOD', 'FROM', 'COMMA'):
+ if line != []:
+ lines.append(line[:])
+ line = []
+ if t[0] == 'PERIOD':
+ rules.append((rule_start, len(lines)))
+ rule_start = len(lines)
+ continue
+ if t[0] in ('LPAREN', 'LBRACKET', 'LBRACE'):
+ parens.append(t[0])
+ elif parens:
+ if t[0] == 'RPAREN' and parens[-1] == 'LPAREN':
+ parens.pop()
+ elif t[0] == 'RBRACKET' and parens[-1] == 'LBRACKET':
+ parens.pop()
+ elif t[0] == 'RBRACE' and parens[-1] == 'LBRACE':
+ parens.pop()
+ line.append(t)
+ return lines, rules
+
+# pretty-print a list of rules (no support for ; yet)
+def compose(lines, rules):
+ code = ''
+ for start, end in rules:
+ for i in range(start, end):
+ line = lines[i]
+ if i > start:
+ code += ' '
+ code += stringify(line).replace('\n', ' ')
+ if i == end-1:
+ code += '.\n'
+ elif i == start:
+ code += ' :-\n'
+ else:
+ if line and line[-1][0] != 'SEMI' and i < end-1 and lines[i+1][-1][0] != 'SEMI':
+ code += ','
+ code += '\n'
+ return code
+
+# rename variables in order of appearance
+def rename_vars(tokens, names={}):
+ # copy names so we don't fuck it up
+ names = {k: v for k, v in names.items()}
+ next_id = len(names)
+ for i in range(len(tokens)):
+ if tokens[i][0] == 'PERIOD':
+ names.clear()
+ next_id = 0
+ elif tokens[i] == ('VARIABLE', '_'):
+ tokens[i] = ('VARIABLE', 'A' + str(next_id))
+ next_id += 1
+ elif tokens[i][0] == 'VARIABLE':
+ cur_name = tokens[i][1]
+ if cur_name not in names:
+ names[cur_name] = next_id
+ next_id += 1
+ tokens[i] = ('VARIABLE', 'A' + str(names[cur_name]))
+ return names