From 819ab10281c9bd6c000364c3a243959edd18abf7 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Thu, 15 Jan 2015 12:10:22 +0100 Subject: Move pymonkey stuff to monkey/ Importing pymonkey into webmonkey, let's see how this works. --- monkey/prolog/engine.py | 135 +++++++++++++++++++++++++++++++++++++++++ monkey/prolog/lexer.py | 90 ++++++++++++++++++++++++++++ monkey/prolog/util.py | 156 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 381 insertions(+) create mode 100644 monkey/prolog/engine.py create mode 100644 monkey/prolog/lexer.py create mode 100644 monkey/prolog/util.py (limited to 'monkey/prolog') diff --git a/monkey/prolog/engine.py b/monkey/prolog/engine.py new file mode 100644 index 0000000..dff577c --- /dev/null +++ b/monkey/prolog/engine.py @@ -0,0 +1,135 @@ +#!/usr/bin/python3 + +import http.client +import json +import re +import urllib + +address, port = 'localhost', 3030 + +class PrologEngine(object): + def __init__(self, address=address, port=port, code='', destroy=False, id=None): + self.conn = http.client.HTTPConnection(address, port, timeout=10) + + # If existing engine ID is given, use it. + if id: + self.id = id + return + + # Otherwise, create a new engine. + hdrs = {'Content-Type': 'application/json;charset=utf-8'} + opts = json.dumps({'destroy': destroy, 'src_text': code, 'format': 'json-s'}) + reply, outputs = self.request('POST', '/pengine/create', body=opts, headers=hdrs) + + failed = (reply['event'] != 'create') + warnings = [] + errors = [] + for output in outputs: + print(output) + message = PrologEngine.parse_prolog_output(output) + if output['message'] == 'warning': + warnings.append(message) + elif output['message'] == 'error': + failed = True + errors.append(message) + + if failed: + raise Exception('\n'.join(errors)) + + self.id = reply['id'] + + def send(self, event): + params = urllib.parse.urlencode({ + 'id': self.id, + 'event': event, + 'format': 'json-s'}) + reply, outputs = self.request('GET', '/pengine/send?' + params) + return reply + + def ask(self, query): + event = 'ask(({}),[])'.format(query) + reply = self.send(event) + return reply + + def next(self, n=1): + event = 'next({})'.format(n) + reply = self.send(event) + return reply + + def stop(self): + return self.send('stop') + + def destroy(self): + reply = self.send('destroy') + self.id = None + self.conn.close() + self.conn = None + + # Return the main reply and possible output replies. + def request(self, method, path, body=None, headers={}): + self.conn.request(method, path, body, headers) + outputs = [] + while True: + response = self.conn.getresponse() + if response.status != http.client.OK: + raise Exception('server returned {}'.format(response.status)) + reply = json.loads(response.read().decode('utf-8')) + self.id = reply['id'] + if reply['event'] == 'output': + outputs.append(reply) + params = urllib.parse.urlencode({ + 'id': self.id, + 'format': 'json-s'}) + self.conn.request('GET', '/pengine/pull_response?' + params) + else: + return reply, outputs + + # Check if output is an error message and return a prettified version of it. + def parse_prolog_output(output): + match = re.match(r'.*
(.*)
.*', + output['data'], flags=re.DOTALL) + data = match.group(1).strip() + message = '' + if output['message'] == 'error': + if 'location' in output: + loc = output['location'] + message += 'near line ' + str(loc['line']) + if 'ch' in loc: + message += ', character ' + str(loc['ch']) + message += ': ' + + if output.get('code') == 'syntax_error': + match = re.match(r'^.*Syntax error: (.*)$', data, flags=re.DOTALL) + message += match.group(1) + elif output.get('code') == 'permission_error': + match = re.match(r'^.*(No permission [^\n]*)', data, flags=re.DOTALL) + message += match.group(1) + elif output.get('code') == 'type_error': + match = re.match(r'^.*(Type error: [^\n]*)', data, flags=re.DOTALL) + message += match.group(1) + else: + message += data + + # Replace anonymous variable names with _. + message = re.sub(r'_G[0-9]*', '_', message) + return message + +def test(name, code): + engine = PrologEngine(code=code) + reply = engine.ask("run_tests({}, '{}', Results)".format(name, engine.id)) + engine.destroy() + + if reply['event'] != 'success': + raise Exception('testing procedure failed') + + results = re.findall(r'(?:success|failure)\([^)]*\)', reply['data'][0]['Results']) + n_total = len(results) + n_passed = len([r for r in results if r.startswith('success')]) + return (n_passed, n_total) + +# Basic sanity check. +if __name__ == '__main__': + engine = PrologEngine(code='dup([],[]). dup([H|T],[H,H|TT]) :- dup(T,TT).') + print('engine id is ' + engine.id) + print(engine.ask("run_tests({},'{}',Result)".format('dup/2', engine.id))) + engine.destroy() diff --git a/monkey/prolog/lexer.py b/monkey/prolog/lexer.py new file mode 100644 index 0000000..971e8a6 --- /dev/null +++ b/monkey/prolog/lexer.py @@ -0,0 +1,90 @@ +#!/usr/bin/python3 + +import ply.lex as lex + +# LEXER + +#states = ( +# ('comment', 'exclusive'), +#) + +# tokens; treat operators as names if followed by ( +operators = { + r':-': 'FROM', + r'->': 'IMPLIES', + r'\+': 'NOT', + r'not': 'NOT', + r'=': 'EQU', + r'\=': 'NEQU', + r'==': 'EQ', + r'\==': 'NEQ', + r'=..': 'UNIV', + r'is': 'IS', + r'=:=': 'EQA', + r'=\=': 'NEQA', + r'<': 'LT', + r'=<': 'LE', + r'>': 'GT', + r'>=': 'GE', + r'@<': 'LTL', + r'@=<': 'LEL', + r'@>': 'GTL', + r'@>=': 'GEL', + r'+': 'PLUS', + r'-': 'MINUS', + r'*': 'STAR', + r'/': 'DIV', + r'//': 'IDIV', + r'mod': 'MOD', + r'**': 'POW', + r'.': 'PERIOD', + r',': 'COMMA', + r';': 'SEMI' +} +tokens = list(operators.values()) + [ + 'UINTEGER', 'UREAL', + 'NAME', 'VARIABLE', 'STRING', + 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE', + 'INVALID' +] + +# punctuation +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_PIPE = r'\|' +t_LBRACE = r'{' +t_RBRACE = r'}' + +t_UINTEGER = r'[0-9]+' +t_UREAL = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan' +t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*' +t_STRING = r'"(""|\\.|[^\"])*"' + +# no support for nested comments yet +def t_comment(t): + r'(/\*(.|\n)*?\*/)|(%.*)' + pass + +def t_NAME(t): + r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|," + if t.lexer.lexpos >= len(t.lexer.lexdata) or t.lexer.lexdata[t.lexer.lexpos] != '(': + t.type = operators.get(t.value, 'NAME') + return t + +t_ignore = ' \t' + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +def t_error(t): + # TODO send this to stderr + #print("Illegal character '" + t.value[0] + "'") + t.type = 'INVALID' + t.value = t.value[0] + t.lexer.skip(1) + return t + +lexer = lex.lex() diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py new file mode 100644 index 0000000..0ab3c8b --- /dev/null +++ b/monkey/prolog/util.py @@ -0,0 +1,156 @@ +#!/usr/bin/python3 + +import itertools +import math +import re + +from .lexer import lexer +from util import Token + +def tokenize(text): + lexer.input(text) + return [Token(t.type, t.value, t.lexpos) for t in lexer] + +operators = set([ + 'FROM', 'IMPLIES', 'NOT', + 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', + 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', + 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', + 'POW', 'SEMI' +]) +def stringify(tokens): + def token_str(t): + if t.type in ('PERIOD', 'COMMA'): + return str(t) + ' ' + if t.type in operators: + return ' ' + str(t) + ' ' + return str(t) + return ''.join(map(token_str, tokens)) + +# Yield the sequence of rules in [code]. +def split(code): + tokens = tokenize(code) + start = 0 + for idx, token in enumerate(tokens): + if token.type == 'PERIOD' and idx - start > 1: + yield stringify(tokens[start:idx]) + start = idx + 1 + +# return a list of lines in 'code', and a list of rule indexes +def decompose(code): + lines = [] + rules = [] + tokens = tokenize(code) + tokens.append(Token('EOF')) + + line = [] + parens = [] + rule_start = 0 + for t in tokens: + if t.type == 'SEMI': + if line != []: + lines.append(tuple(line)) + line = [] + lines.append((t,)) + continue + if not parens: + if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'): + if line != []: + lines.append(tuple(line)) + line = [] + if t.type in ('PERIOD', 'EOF') and rule_start < len(lines): + rules.append((rule_start, len(lines))) + rule_start = len(lines) + continue + if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'): + parens.append(t.type) + elif parens: + if t.type == 'RPAREN' and parens[-1] == 'LPAREN': + parens.pop() + elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET': + parens.pop() + elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': + parens.pop() + line.append(t) + return tuple(lines), tuple(rules) + +# pretty-print a list of rules +def compose(lines, rules): + code = '' + for start, end in rules: + for i in range(start, end): + line = lines[i] + if i > start: + code += ' ' + code += stringify(line) + if i == end-1: + code += '.\n' + elif i == start: + code += ' :-\n' + else: + if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI': + code += ',' + code += '\n' + return code.strip() + +# standardize variable names in order of appearance +def rename_vars(tokens, names={}): + # copy names so we don't fuck it up + names = {k: v for k, v in names.items()} + next_id = len(names) + for i in range(len(tokens)): + if tokens[i].type == 'PERIOD': + names.clear() + next_id = 0 + elif tokens[i] == Token('VARIABLE', '_'): + tokens[i] = Token('VARIABLE', 'A' + str(next_id)) + next_id += 1 + elif tokens[i].type == 'VARIABLE': + cur_name = tokens[i].val + if cur_name not in names: + names[cur_name] = next_id + next_id += 1 + tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name])) + return names + +# transformation = before → after; applied on line which is part of rule +# return mapping from formal vars in before+after to actual vars in rule +# line and rule should of course not be normalized +def map_vars(before, after, line, rule): + mapping = {} + new_index = 0 + for i in range(len(before)): + if line[i].type == 'VARIABLE': + formal_name = before[i].val + if line[i].val != '_': + actual_name = line[i].val + else: + actual_name = 'New'+str(new_index) + new_index += 1 + mapping[formal_name] = actual_name + + remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()] + remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()] + + while len(remaining_actual) < len(remaining_formal): + remaining_actual.append('New'+str(new_index)) + new_index += 1 + + for i, formal_name in enumerate(remaining_formal): + mapping[formal_name] = remaining_actual[i] + + return mapping + +# Basic sanity check. +if __name__ == '__main__': + print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). '))) + + rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ') + line = tokenize('dup([H|T], [H1|T1]) :-') + before = tokenize("dup([A0|A1], [A2|A3])") + after = tokenize("dup([A0|A1], [A5, A4|A3])") + var_names = rename_vars(before) + rename_vars(after, var_names) + + mapping = map_vars(before, after, line, rule) + print(mapping) -- cgit v1.2.1