3 files changed, 381 insertions, 0 deletions
diff --git a/monkey/prolog/engine.py b/monkey/prolog/engine.py
new file mode 100644
index 0000000..dff577c
--- /dev/null
+++ b/monkey/prolog/engine.py
@@ -0,0 +1,135 @@
+#!/usr/bin/python3
+
+import http.client
+import json
+import re
+import urllib
+
+address, port = 'localhost', 3030
+
+class PrologEngine(object):
+    def __init__(self, address=address, port=port, code='', destroy=False, id=None):
+        self.conn = http.client.HTTPConnection(address, port, timeout=10)
+
+        # If existing engine ID is given, use it.
+        if id:
+            self.id = id
+            return
+
+        # Otherwise, create a new engine.
+        hdrs = {'Content-Type': 'application/json;charset=utf-8'}
+        opts = json.dumps({'destroy': destroy, 'src_text': code, 'format': 'json-s'})
+        reply, outputs = self.request('POST', '/pengine/create', body=opts, headers=hdrs)
+
+        failed = (reply['event'] != 'create')
+        warnings = []
+        errors = []
+        for output in outputs:
+            print(output)
+            message = PrologEngine.parse_prolog_output(output)
+            if output['message'] == 'warning':
+                warnings.append(message)
+            elif output['message'] == 'error':
+                failed = True
+                errors.append(message)
+
+        if failed:
+            raise Exception('\n'.join(errors))
+
+        self.id = reply['id']
+
+    def send(self, event):
+        params = urllib.parse.urlencode({
+            'id': self.id,
+            'event': event,
+            'format': 'json-s'})
+        reply, outputs = self.request('GET', '/pengine/send?' + params)
+        return reply
+
+    def ask(self, query):
+        event = 'ask(({}),[])'.format(query)
+        reply = self.send(event)
+        return reply
+
+    def next(self, n=1):
+        event = 'next({})'.format(n)
+        reply = self.send(event)
+        return reply
+
+    def stop(self):
+        return self.send('stop')
+
+    def destroy(self):
+        reply = self.send('destroy')
+        self.id = None
+        self.conn.close()
+        self.conn = None
+
+    # Return the main reply and possible output replies.
+    def request(self, method, path, body=None, headers={}):
+        self.conn.request(method, path, body, headers)
+        outputs = []
+        while True:
+            response = self.conn.getresponse()
+            if response.status != http.client.OK:
+                raise Exception('server returned {}'.format(response.status))
+            reply = json.loads(response.read().decode('utf-8'))
+            self.id = reply['id']
+            if reply['event'] == 'output':
+                outputs.append(reply)
+                params = urllib.parse.urlencode({
+                    'id': self.id,
+                    'format': 'json-s'})
+                self.conn.request('GET', '/pengine/pull_response?' + params)
+            else:
+                return reply, outputs
+
+    # Check if output is an error message and return a prettified version of it.
+    def parse_prolog_output(output):
+        match = re.match(r'.*<pre class="[^"]*">(.*)</pre>.*',
+                         output['data'], flags=re.DOTALL)
+        data = match.group(1).strip()
+        message = ''
+        if output['message'] == 'error':
+            if 'location' in output:
+                loc = output['location']
+                message += 'near line ' + str(loc['line'])
+                if 'ch' in loc:
+                    message += ', character ' + str(loc['ch'])
+                message += ': '
+
+            if output.get('code') == 'syntax_error':
+                match = re.match(r'^.*Syntax error: (.*)$', data, flags=re.DOTALL)
+                message += match.group(1)
+            elif output.get('code') == 'permission_error':
+                match = re.match(r'^.*(No permission [^\n]*)', data, flags=re.DOTALL)
+                message += match.group(1)
+            elif output.get('code') == 'type_error':
+                match = re.match(r'^.*(Type error: [^\n]*)', data, flags=re.DOTALL)
+                message += match.group(1)
+            else:
+                message += data
+
+        # Replace anonymous variable names with _.
+        message = re.sub(r'_G[0-9]*', '_', message)
+        return message
+
+def test(name, code):
+    engine = PrologEngine(code=code)
+    reply = engine.ask("run_tests({}, '{}', Results)".format(name, engine.id))
+    engine.destroy()
+
+    if reply['event'] != 'success':
+        raise Exception('testing procedure failed')
+
+    results = re.findall(r'(?:success|failure)\([^)]*\)', reply['data'][0]['Results'])
+    n_total = len(results)
+    n_passed = len([r for r in results if r.startswith('success')])
+    return (n_passed, n_total)
+
+# Basic sanity check.
+if __name__ == '__main__':
+    engine = PrologEngine(code='dup([],[]). dup([H|T],[H,H|TT]) :- dup(T,TT).')
+    print('engine id is ' + engine.id)
+    print(engine.ask("run_tests({},'{}',Result)".format('dup/2', engine.id)))
+    engine.destroy()
diff --git a/monkey/prolog/lexer.py b/monkey/prolog/lexer.py
new file mode 100644
index 0000000..971e8a6
--- /dev/null
+++ b/monkey/prolog/lexer.py
@@ -0,0 +1,90 @@
+#!/usr/bin/python3
+
+import ply.lex as lex
+
+# LEXER
+
+#states = (
+#    ('comment', 'exclusive'),
+#)
+
+# tokens; treat operators as names if followed by (
+operators = {
+    r':-':  'FROM',
+    r'->':  'IMPLIES',
+    r'\+':  'NOT',
+    r'not': 'NOT',
+    r'=':   'EQU',
+    r'\=':  'NEQU',
+    r'==':  'EQ',
+    r'\==': 'NEQ',
+    r'=..': 'UNIV',
+    r'is':  'IS',
+    r'=:=': 'EQA',
+    r'=\=': 'NEQA',
+    r'<':   'LT',
+    r'=<':  'LE',
+    r'>':   'GT',
+    r'>=':  'GE',
+    r'@<':  'LTL',
+    r'@=<': 'LEL',
+    r'@>':  'GTL',
+    r'@>=': 'GEL',
+    r'+':   'PLUS',
+    r'-':   'MINUS',
+    r'*':   'STAR',
+    r'/':   'DIV',
+    r'//':  'IDIV',
+    r'mod': 'MOD',
+    r'**':  'POW',
+    r'.':   'PERIOD',
+    r',':   'COMMA',
+    r';':   'SEMI'
+}
+tokens = list(operators.values()) + [
+    'UINTEGER', 'UREAL',
+    'NAME', 'VARIABLE', 'STRING',
+    'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE',
+    'INVALID'
+]
+
+# punctuation
+t_LBRACKET = r'\['
+t_RBRACKET = r'\]'
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_PIPE = r'\|'
+t_LBRACE = r'{'
+t_RBRACE = r'}'
+
+t_UINTEGER = r'[0-9]+'
+t_UREAL    = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan'
+t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*'
+t_STRING   = r'"(""|\\.|[^\"])*"'
+
+# no support for nested comments yet
+def t_comment(t):
+    r'(/\*(.|\n)*?\*/)|(%.*)'
+    pass
+
+def t_NAME(t):
+    r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|,"
+    if t.lexer.lexpos >= len(t.lexer.lexdata) or t.lexer.lexdata[t.lexer.lexpos] != '(':
+        t.type = operators.get(t.value, 'NAME')
+    return t
+
+t_ignore  = ' \t'
+
+def t_newline(t):
+    r'\n+'
+    t.lexer.lineno += len(t.value)
+
+def t_error(t):
+    # TODO send this to stderr
+    #print("Illegal character '" + t.value[0] + "'")
+    t.type = 'INVALID'
+    t.value = t.value[0]
+    t.lexer.skip(1)
+    return t
+
+lexer = lex.lex()
diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py
new file mode 100644
index 0000000..0ab3c8b
--- /dev/null
+++ b/monkey/prolog/util.py
@@ -0,0 +1,156 @@
+#!/usr/bin/python3
+
+import itertools
+import math
+import re
+
+from .lexer import lexer
+from util import Token
+
+def tokenize(text):
+    lexer.input(text)
+    return [Token(t.type, t.value, t.lexpos) for t in lexer]
+
+operators = set([
+    'FROM', 'IMPLIES', 'NOT',
+    'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
+    'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
+    'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
+    'POW', 'SEMI'
+])
+def stringify(tokens):
+    def token_str(t):
+        if t.type in ('PERIOD', 'COMMA'):
+            return str(t) + ' '
+        if t.type in operators:
+            return ' ' + str(t) + ' '
+        return str(t)
+    return ''.join(map(token_str, tokens))
+
+# Yield the sequence of rules in [code].
+def split(code):
+    tokens = tokenize(code)
+    start = 0
+    for idx, token in enumerate(tokens):
+        if token.type == 'PERIOD' and idx - start > 1:
+            yield stringify(tokens[start:idx])
+            start = idx + 1
+
+# return a list of lines in 'code', and a list of rule indexes
+def decompose(code):
+    lines = []
+    rules = []
+    tokens = tokenize(code)
+    tokens.append(Token('EOF'))
+
+    line = []
+    parens = []
+    rule_start = 0
+    for t in tokens:
+        if t.type == 'SEMI':
+            if line != []:
+                lines.append(tuple(line))
+                line = []
+            lines.append((t,))
+            continue
+        if not parens:
+            if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
+                if line != []:
+                    lines.append(tuple(line))
+                    line = []
+                if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
+                    rules.append((rule_start, len(lines)))
+                    rule_start = len(lines)
+                continue
+        if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'):
+            parens.append(t.type)
+        elif parens:
+            if t.type == 'RPAREN' and parens[-1] == 'LPAREN':
+                parens.pop()
+            elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET':
+                parens.pop()
+            elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
+                parens.pop()
+        line.append(t)
+    return tuple(lines), tuple(rules)
+
+# pretty-print a list of rules
+def compose(lines, rules):
+    code = ''
+    for start, end in rules:
+        for i in range(start, end):
+            line = lines[i]
+            if i > start:
+                code += '  '
+            code += stringify(line)
+            if i == end-1:
+                code += '.\n'
+            elif i == start:
+                code += ' :-\n'
+            else:
+                if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI':
+                    code += ','
+                code += '\n'
+    return code.strip()
+
+# standardize variable names in order of appearance
+def rename_vars(tokens, names={}):
+    # copy names so we don't fuck it up
+    names = {k: v for k, v in names.items()}
+    next_id = len(names)
+    for i in range(len(tokens)):
+        if tokens[i].type == 'PERIOD':
+            names.clear()
+            next_id = 0
+        elif tokens[i] == Token('VARIABLE', '_'):
+            tokens[i] = Token('VARIABLE', 'A' + str(next_id))
+            next_id += 1
+        elif tokens[i].type == 'VARIABLE':
+            cur_name = tokens[i].val
+            if cur_name not in names:
+                names[cur_name] = next_id
+                next_id += 1
+            tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name]))
+    return names
+
+# transformation = before → after; applied on line which is part of rule
+# return mapping from formal vars in before+after to actual vars in rule
+# line and rule should of course not be normalized
+def map_vars(before, after, line, rule):
+    mapping = {}
+    new_index = 0
+    for i in range(len(before)):
+        if line[i].type == 'VARIABLE':
+            formal_name = before[i].val
+            if line[i].val != '_':
+                actual_name = line[i].val
+            else:
+                actual_name = 'New'+str(new_index)
+                new_index += 1
+            mapping[formal_name] = actual_name
+
+    remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()]
+    remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()]
+
+    while len(remaining_actual) < len(remaining_formal):
+        remaining_actual.append('New'+str(new_index))
+        new_index += 1
+
+    for i, formal_name in enumerate(remaining_formal):
+        mapping[formal_name] = remaining_actual[i]
+
+    return mapping
+
+# Basic sanity check.
+if __name__ == '__main__':
+    print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). ')))
+
+    rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ')
+    line = tokenize('dup([H|T], [H1|T1]) :-')
+    before = tokenize("dup([A0|A1], [A2|A3])")
+    after = tokenize("dup([A0|A1], [A5, A4|A3])")
+    var_names = rename_vars(before)
+    rename_vars(after, var_names)
+
+    mapping = map_vars(before, after, line, rule)
+    print(mapping)