From 819ab10281c9bd6c000364c3a243959edd18abf7 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Thu, 15 Jan 2015 12:10:22 +0100 Subject: Move pymonkey stuff to monkey/ Importing pymonkey into webmonkey, let's see how this works. --- prolog/util.py | 156 --------------------------------------------------------- 1 file changed, 156 deletions(-) delete mode 100644 prolog/util.py (limited to 'prolog/util.py') diff --git a/prolog/util.py b/prolog/util.py deleted file mode 100644 index 0ab3c8b..0000000 --- a/prolog/util.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/python3 - -import itertools -import math -import re - -from .lexer import lexer -from util import Token - -def tokenize(text): - lexer.input(text) - return [Token(t.type, t.value, t.lexpos) for t in lexer] - -operators = set([ - 'FROM', 'IMPLIES', 'NOT', - 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', - 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', - 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', - 'POW', 'SEMI' -]) -def stringify(tokens): - def token_str(t): - if t.type in ('PERIOD', 'COMMA'): - return str(t) + ' ' - if t.type in operators: - return ' ' + str(t) + ' ' - return str(t) - return ''.join(map(token_str, tokens)) - -# Yield the sequence of rules in [code]. -def split(code): - tokens = tokenize(code) - start = 0 - for idx, token in enumerate(tokens): - if token.type == 'PERIOD' and idx - start > 1: - yield stringify(tokens[start:idx]) - start = idx + 1 - -# return a list of lines in 'code', and a list of rule indexes -def decompose(code): - lines = [] - rules = [] - tokens = tokenize(code) - tokens.append(Token('EOF')) - - line = [] - parens = [] - rule_start = 0 - for t in tokens: - if t.type == 'SEMI': - if line != []: - lines.append(tuple(line)) - line = [] - lines.append((t,)) - continue - if not parens: - if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): - if line != []: - lines.append(tuple(line)) - line = [] - if t.type in ('PERIOD', 'EOF') and rule_start < len(lines): - rules.append((rule_start, len(lines))) - rule_start = len(lines) - continue - if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'): - parens.append(t.type) - elif parens: - if t.type == 'RPAREN' and parens[-1] == 'LPAREN': - parens.pop() - elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET': - parens.pop() - elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': - parens.pop() - line.append(t) - return tuple(lines), tuple(rules) - -# pretty-print a list of rules -def compose(lines, rules): - code = '' - for start, end in rules: - for i in range(start, end): - line = lines[i] - if i > start: - code += ' ' - code += stringify(line) - if i == end-1: - code += '.\n' - elif i == start: - code += ' :-\n' - else: - if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI': - code += ',' - code += '\n' - return code.strip() - -# standardize variable names in order of appearance -def rename_vars(tokens, names={}): - # copy names so we don't fuck it up - names = {k: v for k, v in names.items()} - next_id = len(names) - for i in range(len(tokens)): - if tokens[i].type == 'PERIOD': - names.clear() - next_id = 0 - elif tokens[i] == Token('VARIABLE', '_'): - tokens[i] = Token('VARIABLE', 'A' + str(next_id)) - next_id += 1 - elif tokens[i].type == 'VARIABLE': - cur_name = tokens[i].val - if cur_name not in names: - names[cur_name] = next_id - next_id += 1 - tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name])) - return names - -# transformation = before → after; applied on line which is part of rule -# return mapping from formal vars in before+after to actual vars in rule -# line and rule should of course not be normalized -def map_vars(before, after, line, rule): - mapping = {} - new_index = 0 - for i in range(len(before)): - if line[i].type == 'VARIABLE': - formal_name = before[i].val - if line[i].val != '_': - actual_name = line[i].val - else: - actual_name = 'New'+str(new_index) - new_index += 1 - mapping[formal_name] = actual_name - - remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()] - remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()] - - while len(remaining_actual) < len(remaining_formal): - remaining_actual.append('New'+str(new_index)) - new_index += 1 - - for i, formal_name in enumerate(remaining_formal): - mapping[formal_name] = remaining_actual[i] - - return mapping - -# Basic sanity check. -if __name__ == '__main__': - print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). '))) - - rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ') - line = tokenize('dup([H|T], [H1|T1]) :-') - before = tokenize("dup([A0|A1], [A2|A3])") - after = tokenize("dup([A0|A1], [A5, A4|A3])") - var_names = rename_vars(before) - rename_vars(after, var_names) - - mapping = map_vars(before, after, line, rule) - print(mapping) -- cgit v1.2.1