#!/usr/bin/python3 import itertools import math import re from .lexer import lexer from util import Token def tokenize(text): lexer.input(text) return [Token(t.type, t.value, t.lexpos) for t in lexer] operators = set([ 'FROM', 'IMPLIES', 'NOT', 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', 'POW', 'SEMI' ]) def stringify(tokens): def token_str(t): if t.type in ('PERIOD', 'COMMA'): return str(t) + ' ' if t.type in operators: return ' ' + str(t) + ' ' return str(t) return ''.join(map(token_str, tokens)) # return a list of lines in 'code', and a list of rule indexes def decompose(code): lines = [] rules = [] tokens = tokenize(code) tokens.append(Token('EOF')) line = [] parens = [] rule_start = 0 for t in tokens: if t.type == 'SEMI': if line != []: lines.append(tuple(line)) line = [] lines.append((t,)) continue if not parens: if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): if line != []: lines.append(tuple(line)) line = [] if t.type in ('PERIOD', 'EOF') and rule_start < len(lines): rules.append((rule_start, len(lines))) rule_start = len(lines) continue if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'): parens.append(t.type) elif parens: if t.type == 'RPAREN' and parens[-1] == 'LPAREN': parens.pop() elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET': parens.pop() elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': parens.pop() line.append(t) return tuple(lines), tuple(rules) # pretty-print a list of rules def compose(lines, rules): code = '' for start, end in rules: for i in range(start, end): line = lines[i] if i > start: code += ' ' code += stringify(line) if i == end-1: code += '.\n' elif i == start: code += ' :-\n' else: if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI': code += ',' code += '\n' return code.strip() # standardize variable names in order of appearance def rename_vars(tokens, names={}): # copy names so we don't fuck it up names = {k: v for k, v in names.items()} next_id = len(names) for i in range(len(tokens)): if tokens[i].type == 'PERIOD': names.clear() next_id = 0 elif tokens[i] == Token('VARIABLE', '_'): tokens[i] = Token('VARIABLE', 'A' + str(next_id)) next_id += 1 elif tokens[i].type == 'VARIABLE': cur_name = tokens[i].val if cur_name not in names: names[cur_name] = next_id next_id += 1 tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name])) return names # transformation = before → after; applied on line which is part of rule # return mapping from formal vars in before+after to actual vars in rule # line and rule should of course not be normalized def map_vars(before, after, line, rule): mapping = {} new_index = 0 for i in range(len(before)): if line[i].type == 'VARIABLE': formal_name = before[i].val if line[i].val != '_': actual_name = line[i].val else: actual_name = 'New'+str(new_index) new_index += 1 mapping[formal_name] = actual_name remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()] remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()] while len(remaining_actual) < len(remaining_formal): remaining_actual.append('New'+str(new_index)) new_index += 1 for i, formal_name in enumerate(remaining_formal): mapping[formal_name] = remaining_actual[i] return mapping # Basic sanity check. if __name__ == '__main__': print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). '))) rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ') line = tokenize('dup([H|T], [H1|T1]) :-') before = tokenize("dup([A0|A1], [A2|A3])") after = tokenize("dup([A0|A1], [A5, A4|A3])") var_names = rename_vars(before) rename_vars(after, var_names) mapping = map_vars(before, after, line, rule) print(mapping)