From 1a99d21e12dad4c01d4c892ed4e8a0506bcf36aa Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 29 Jan 2014 13:23:04 +0100 Subject: Initial commit for pymonkey --- prolog/util.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 prolog/util.py (limited to 'prolog/util.py') diff --git a/prolog/util.py b/prolog/util.py new file mode 100644 index 0000000..0aa0b09 --- /dev/null +++ b/prolog/util.py @@ -0,0 +1,114 @@ +#!/usr/bin/python3 + +import math +import re + +from .lexer import lexer + +# new lexer stuff +def tokenize(text): + # feed the troll + lexer.input(text) + # we are not interested in line numbers and absolute positions + return [(t.type, t.value) for t in lexer] + +operators = set([ + 'FROM', 'IMPLIES', 'NOT', + 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', + 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', + 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', + 'POW', 'SEMI' +]) +def stringify(tokens, indent=''): + s = indent + for t in tokens: + if t[0] in operators: + s += ' ' + + if t[0] == 'FROM': + s += ':-\n ' + indent + elif t[0] == 'PERIOD': + s += '.\n' + indent + elif t[0] == 'COMMA': + s += ', ' + elif t[0] in operators: + s += t[1] + ' ' + else: + s += t[1] + + return s + +# return a list of lines in 'code', and a list of rule indexes +def decompose(code): + lines = [] + rules = [] + tokens = tokenize(code) + + line = [] + parens = [] + rule_start = 0 + for t in tokens: + if t[0] == 'SEMI': + lines.append(line[:]) + lines.append([t]) + line = [] + continue + if not parens: + if t[0] in ('PERIOD', 'FROM', 'COMMA'): + if line != []: + lines.append(line[:]) + line = [] + if t[0] == 'PERIOD': + rules.append((rule_start, len(lines))) + rule_start = len(lines) + continue + if t[0] in ('LPAREN', 'LBRACKET', 'LBRACE'): + parens.append(t[0]) + elif parens: + if t[0] == 'RPAREN' and parens[-1] == 'LPAREN': + parens.pop() + elif t[0] == 'RBRACKET' and parens[-1] == 'LBRACKET': + parens.pop() + elif t[0] == 'RBRACE' and parens[-1] == 'LBRACE': + parens.pop() + line.append(t) + return lines, rules + +# pretty-print a list of rules (no support for ; yet) +def compose(lines, rules): + code = '' + for start, end in rules: + for i in range(start, end): + line = lines[i] + if i > start: + code += ' ' + code += stringify(line).replace('\n', ' ') + if i == end-1: + code += '.\n' + elif i == start: + code += ' :-\n' + else: + if line and line[-1][0] != 'SEMI' and i < end-1 and lines[i+1][-1][0] != 'SEMI': + code += ',' + code += '\n' + return code + +# rename variables in order of appearance +def rename_vars(tokens, names={}): + # copy names so we don't fuck it up + names = {k: v for k, v in names.items()} + next_id = len(names) + for i in range(len(tokens)): + if tokens[i][0] == 'PERIOD': + names.clear() + next_id = 0 + elif tokens[i] == ('VARIABLE', '_'): + tokens[i] = ('VARIABLE', 'A' + str(next_id)) + next_id += 1 + elif tokens[i][0] == 'VARIABLE': + cur_name = tokens[i][1] + if cur_name not in names: + names[cur_name] = next_id + next_id += 1 + tokens[i] = ('VARIABLE', 'A' + str(names[cur_name])) + return names -- cgit v1.2.1