From 001739a6a93cceeb29f81ea2281ade0bef1a8645 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 4 Feb 2015 18:47:07 +0100 Subject: Move monkey.prolog to root module --- monkey/prolog/util.py | 152 -------------------------------------------------- 1 file changed, 152 deletions(-) delete mode 100644 monkey/prolog/util.py (limited to 'monkey/prolog/util.py') diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py deleted file mode 100644 index 8d8b266..0000000 --- a/monkey/prolog/util.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/python3 - -from .lexer import lexer, operators -from ..util import Token - -# Return a list of tokens in [text]. -def tokenize(text): - lexer.input(text) - return [Token(t.type, t.value, t.lexpos) for t in lexer] - -# Return a one-line string representation of [tokens]. -def stringify(tokens): - def token_str(t): - if t.type in ('PERIOD', 'COMMA'): - return str(t) + ' ' - if t.type in operators.values(): - return ' ' + str(t) + ' ' - return str(t) - return ''.join(map(token_str, tokens)) - -# Yield the sequence of rules in [code]. -def split(code): - tokens = tokenize(code) - start = 0 - for idx, token in enumerate(tokens): - if token.type == 'PERIOD' and idx - start > 1: - yield stringify(tokens[start:idx]) - start = idx + 1 - -# Return a list of lines in [code] and a list of rule ranges. -def decompose(code): - lines = [] - rules = [] - tokens = tokenize(code) - tokens.append(Token('EOF')) - - line = [] - parens = [] - rule_start = 0 - for t in tokens: - if t.type == 'SEMI': - if line != []: - lines.append(tuple(line)) - line = [] - lines.append((t,)) - continue - if not parens: - if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): - if line != []: - lines.append(tuple(line)) - line = [] - if t.type in ('PERIOD', 'EOF') and rule_start < len(lines): - rules.append((rule_start, len(lines))) - rule_start = len(lines) - continue - if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'): - parens.append(t.type) - elif parens: - if t.type == 'RPAREN' and parens[-1] == 'LPAREN': - parens.pop() - elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET': - parens.pop() - elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': - parens.pop() - line.append(t) - return lines, rules - -# Format a list of [lines] according to [rules] (as returned by decompose). -def compose(lines, rules): - code = '' - for start, end in rules: - for i in range(start, end): - line = lines[i] - if i > start: - code += ' ' - code += stringify(line) - if i == end-1: - code += '.\n' - elif i == start: - code += ' :-\n' - else: - if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI': - code += ',' - code += '\n' - return code.strip() - -# Rename variables in [tokens] to A0, A1, A2,… in order of appearance. -def rename_vars(tokens, names=None): - if names is None: - names = {} - next_id = len(names) - - # Return a new list. - tokens = list(tokens) - for i in range(len(tokens)): - if tokens[i].type == 'PERIOD': - names.clear() - next_id = 0 - elif tokens[i] == Token('VARIABLE', '_'): - tokens[i] = Token('VARIABLE', 'A{}'.format(next_id)) - next_id += 1 - elif tokens[i].type == 'VARIABLE': - cur_name = tokens[i].val - if cur_name not in names: - names[cur_name] = 'A{}'.format(next_id) - next_id += 1 - tokens[i] = Token('VARIABLE', names[cur_name]) - return tokens - -# transformation = before → after; applied on line which is part of rule -# return mapping from formal vars in before+after to actual vars in rule -# line and rule should of course not be normalized -def map_vars(before, after, line, rule): - mapping = {} - new_index = 0 - for i in range(len(before)): - if line[i].type == 'VARIABLE': - formal_name = before[i].val - if line[i].val != '_': - actual_name = line[i].val - else: - actual_name = 'New'+str(new_index) - new_index += 1 - mapping[formal_name] = actual_name - - remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()] - remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()] - - while len(remaining_actual) < len(remaining_formal): - remaining_actual.append('New'+str(new_index)) - new_index += 1 - - for i, formal_name in enumerate(remaining_formal): - mapping[formal_name] = remaining_actual[i] - - return mapping - -# Basic sanity check. -if __name__ == '__main__': - code = 'dup([H|T], [H1|T1]) :- dup(T1, T2). ' - lines, rules = decompose(code) - print(compose(lines, rules)) - - var_names = {} - before = rename_vars(tokenize("dup([A0|A1], [A2|A3])"), var_names) - after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names) - - line = lines[0] - rule = tokenize(code) - - mapping = map_vars(before, after, line, rule) - print(mapping) -- cgit v1.2.1