From 6a104bf8e2baea162d7f9f1d439dd8f671ddd413 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 4 Feb 2015 18:17:03 +0100 Subject: Clean up monkey.prolog.util --- monkey/prolog/util.py | 56 +++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'monkey/prolog/util.py') diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py index 46f6c5c..8d8b266 100644 --- a/monkey/prolog/util.py +++ b/monkey/prolog/util.py @@ -1,24 +1,19 @@ #!/usr/bin/python3 -from .lexer import lexer +from .lexer import lexer, operators from ..util import Token +# Return a list of tokens in [text]. def tokenize(text): lexer.input(text) return [Token(t.type, t.value, t.lexpos) for t in lexer] -operators = set([ - 'FROM', 'IMPLIES', 'NOT', - 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', - 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', - 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', - 'POW', 'SEMI' -]) +# Return a one-line string representation of [tokens]. def stringify(tokens): def token_str(t): if t.type in ('PERIOD', 'COMMA'): return str(t) + ' ' - if t.type in operators: + if t.type in operators.values(): return ' ' + str(t) + ' ' return str(t) return ''.join(map(token_str, tokens)) @@ -32,7 +27,7 @@ def split(code): yield stringify(tokens[start:idx]) start = idx + 1 -# return a list of lines in 'code', and a list of rule indexes +# Return a list of lines in [code] and a list of rule ranges. def decompose(code): lines = [] rules = [] @@ -68,9 +63,9 @@ def decompose(code): elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': parens.pop() line.append(t) - return tuple(lines), tuple(rules) + return lines, rules -# pretty-print a list of rules +# Format a list of [lines] according to [rules] (as returned by decompose). def compose(lines, rules): code = '' for start, end in rules: @@ -89,25 +84,28 @@ def compose(lines, rules): code += '\n' return code.strip() -# standardize variable names in order of appearance -def rename_vars(tokens, names={}): - # copy names so we don't fuck it up - names = {k: v for k, v in names.items()} +# Rename variables in [tokens] to A0, A1, A2,… in order of appearance. +def rename_vars(tokens, names=None): + if names is None: + names = {} next_id = len(names) + + # Return a new list. + tokens = list(tokens) for i in range(len(tokens)): if tokens[i].type == 'PERIOD': names.clear() next_id = 0 elif tokens[i] == Token('VARIABLE', '_'): - tokens[i] = Token('VARIABLE', 'A' + str(next_id)) + tokens[i] = Token('VARIABLE', 'A{}'.format(next_id)) next_id += 1 elif tokens[i].type == 'VARIABLE': cur_name = tokens[i].val if cur_name not in names: - names[cur_name] = next_id + names[cur_name] = 'A{}'.format(next_id) next_id += 1 - tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name])) - return names + tokens[i] = Token('VARIABLE', names[cur_name]) + return tokens # transformation = before → after; applied on line which is part of rule # return mapping from formal vars in before+after to actual vars in rule @@ -139,14 +137,16 @@ def map_vars(before, after, line, rule): # Basic sanity check. if __name__ == '__main__': - print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). '))) - - rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ') - line = tokenize('dup([H|T], [H1|T1]) :-') - before = tokenize("dup([A0|A1], [A2|A3])") - after = tokenize("dup([A0|A1], [A5, A4|A3])") - var_names = rename_vars(before) - rename_vars(after, var_names) + code = 'dup([H|T], [H1|T1]) :- dup(T1, T2). ' + lines, rules = decompose(code) + print(compose(lines, rules)) + + var_names = {} + before = rename_vars(tokenize("dup([A0|A1], [A2|A3])"), var_names) + after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names) + + line = lines[0] + rule = tokenize(code) mapping = map_vars(before, after, line, rule) print(mapping) -- cgit v1.2.1