summaryrefslogtreecommitdiff
path: root/monkey/prolog/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'monkey/prolog/util.py')
-rw-r--r--monkey/prolog/util.py156
1 files changed, 156 insertions, 0 deletions
diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py
new file mode 100644
index 0000000..0ab3c8b
--- /dev/null
+++ b/monkey/prolog/util.py
@@ -0,0 +1,156 @@
+#!/usr/bin/python3
+
+import itertools
+import math
+import re
+
+from .lexer import lexer
+from util import Token
+
+def tokenize(text):
+ lexer.input(text)
+ return [Token(t.type, t.value, t.lexpos) for t in lexer]
+
+operators = set([
+ 'FROM', 'IMPLIES', 'NOT',
+ 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
+ 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
+ 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
+ 'POW', 'SEMI'
+])
+def stringify(tokens):
+ def token_str(t):
+ if t.type in ('PERIOD', 'COMMA'):
+ return str(t) + ' '
+ if t.type in operators:
+ return ' ' + str(t) + ' '
+ return str(t)
+ return ''.join(map(token_str, tokens))
+
+# Yield the sequence of rules in [code].
+def split(code):
+ tokens = tokenize(code)
+ start = 0
+ for idx, token in enumerate(tokens):
+ if token.type == 'PERIOD' and idx - start > 1:
+ yield stringify(tokens[start:idx])
+ start = idx + 1
+
+# return a list of lines in 'code', and a list of rule indexes
+def decompose(code):
+ lines = []
+ rules = []
+ tokens = tokenize(code)
+ tokens.append(Token('EOF'))
+
+ line = []
+ parens = []
+ rule_start = 0
+ for t in tokens:
+ if t.type == 'SEMI':
+ if line != []:
+ lines.append(tuple(line))
+ line = []
+ lines.append((t,))
+ continue
+ if not parens:
+ if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
+ if line != []:
+ lines.append(tuple(line))
+ line = []
+ if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
+ rules.append((rule_start, len(lines)))
+ rule_start = len(lines)
+ continue
+ if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'):
+ parens.append(t.type)
+ elif parens:
+ if t.type == 'RPAREN' and parens[-1] == 'LPAREN':
+ parens.pop()
+ elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET':
+ parens.pop()
+ elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
+ parens.pop()
+ line.append(t)
+ return tuple(lines), tuple(rules)
+
+# pretty-print a list of rules
+def compose(lines, rules):
+ code = ''
+ for start, end in rules:
+ for i in range(start, end):
+ line = lines[i]
+ if i > start:
+ code += ' '
+ code += stringify(line)
+ if i == end-1:
+ code += '.\n'
+ elif i == start:
+ code += ' :-\n'
+ else:
+ if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI':
+ code += ','
+ code += '\n'
+ return code.strip()
+
+# standardize variable names in order of appearance
+def rename_vars(tokens, names={}):
+ # copy names so we don't fuck it up
+ names = {k: v for k, v in names.items()}
+ next_id = len(names)
+ for i in range(len(tokens)):
+ if tokens[i].type == 'PERIOD':
+ names.clear()
+ next_id = 0
+ elif tokens[i] == Token('VARIABLE', '_'):
+ tokens[i] = Token('VARIABLE', 'A' + str(next_id))
+ next_id += 1
+ elif tokens[i].type == 'VARIABLE':
+ cur_name = tokens[i].val
+ if cur_name not in names:
+ names[cur_name] = next_id
+ next_id += 1
+ tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name]))
+ return names
+
+# transformation = before → after; applied on line which is part of rule
+# return mapping from formal vars in before+after to actual vars in rule
+# line and rule should of course not be normalized
+def map_vars(before, after, line, rule):
+ mapping = {}
+ new_index = 0
+ for i in range(len(before)):
+ if line[i].type == 'VARIABLE':
+ formal_name = before[i].val
+ if line[i].val != '_':
+ actual_name = line[i].val
+ else:
+ actual_name = 'New'+str(new_index)
+ new_index += 1
+ mapping[formal_name] = actual_name
+
+ remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()]
+ remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()]
+
+ while len(remaining_actual) < len(remaining_formal):
+ remaining_actual.append('New'+str(new_index))
+ new_index += 1
+
+ for i, formal_name in enumerate(remaining_formal):
+ mapping[formal_name] = remaining_actual[i]
+
+ return mapping
+
+# Basic sanity check.
+if __name__ == '__main__':
+ print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). ')))
+
+ rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ')
+ line = tokenize('dup([H|T], [H1|T1]) :-')
+ before = tokenize("dup([A0|A1], [A2|A3])")
+ after = tokenize("dup([A0|A1], [A5, A4|A3])")
+ var_names = rename_vars(before)
+ rename_vars(after, var_names)
+
+ mapping = map_vars(before, after, line, rule)
+ print(mapping)