summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2015-03-27 14:22:49 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:03 +0200
commit4cffac21da85476e3656b79a8da3a019429740f1 (patch)
tree37ddf60afcb0c9336c2312609d6b90ad86bc9bb2
parentc9a2a1cd045a528e773b3e7705665bec181e84f6 (diff)
Add a parser for (simplified) Prolog
-rw-r--r--prolog/parser.py192
-rw-r--r--prolog/util.py10
2 files changed, 200 insertions, 2 deletions
diff --git a/prolog/parser.py b/prolog/parser.py
new file mode 100644
index 0000000..ec42b5b
--- /dev/null
+++ b/prolog/parser.py
@@ -0,0 +1,192 @@
+#!/usr/bin/python3
+
+import ply.yacc as yacc
+from .lexer import operators, tokens
+from .util import Token
+
+# PARSER
+precedence = (
+ ('nonassoc', 'FROM'),
+ ('right', 'IMPLIES'),
+ ('right', 'NOT'),
+ ('nonassoc', 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL'),
+ ('left', 'PLUS', 'MINUS'),
+ ('left', 'STAR', 'DIV', 'IDIV', 'MOD'),
+ ('nonassoc', 'POW'),
+ ('right', 'UMINUS', 'UPLUS'),
+ ('nonassoc', 'UINTEGER', 'UREAL'),
+ ('nonassoc', 'NAME', 'VARIABLE', 'STRING'),
+ ('nonassoc', 'PERIOD'),
+ ('nonassoc', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'COMMA', 'SEMI', 'PIPE', 'LBRACE', 'RBRACE')
+)
+
+class Node:
+ def __init__(self, type, children=None, data=None):
+ self.type = type
+ self.data = data
+ self.children = children if children else []
+
+ def __str__(self):
+ val = self.type
+ if self.children:
+ val += ' ' + ' '.join([str(c) for c in self.children])
+ val = '({})'.format(val)
+ return val
+
+def make_token(p, n):
+ lextoken = p.slice[n]
+ return Token(lextoken.type, lextoken.value, lextoken.lexpos)
+
+def p_text_empty(p):
+ 'text : '
+ p[0] = Node('text', [])
+def p_text_clause(p):
+ 'text : text clause'
+ p[0] = p[1]
+ p[0].children.append(p[2])
+
+def p_clause_head(p):
+ 'clause : head PERIOD'
+ p[0] = Node('clause', [p[1], make_token(p, 2)])
+def p_clause_rule(p):
+ 'clause : head FROM or PERIOD'
+ p[0] = Node('clause', [p[1], make_token(p, 2), p[3], make_token(p, 4)])
+def p_clause_error(p):
+ 'clause : error PERIOD'
+ p[0] = Node('error')
+
+def p_head(p):
+ 'head : term'
+ p[0] = Node('head', [p[1]])
+
+def p_or_single(p):
+ 'or : if'
+ p[0] = p[1]
+def p_or_if(p):
+ 'or : or SEMI if'
+ if p[1].type == 'or':
+ p[0] = p[1]
+ else:
+ p[0] = Node('or', [p[1]])
+ p[0].children.append(make_token(p, 2))
+ p[0].children.append(p[3])
+
+def p_if_single(p):
+ 'if : and'
+ p[0] = p[1]
+def p_if_and(p):
+ 'if : and IMPLIES if'
+ p[0] = Node('if', [p[1], make_token(p, 2), p[3]])
+
+def p_and_single(p):
+ 'and : term'
+ p[0] = p[1]
+def p_and_term(p):
+ 'and : and COMMA term'
+ if p[1].type == 'and':
+ p[0] = p[1]
+ else:
+ p[0] = Node('and', [p[1]])
+ p[0].children.append(make_token(p, 2))
+ p[0].children.append(p[3])
+
+def p_term_functor(p):
+ 'term : functor LPAREN args RPAREN'
+ # No whitespace allowed between functor and LPAREN.
+ t2 = make_token(p, 2)
+ if p[1].children[0].pos + len(p[1].children[0].val) < t2.pos:
+ raise SyntaxError('whitespace before ' + str(t2))
+ p[0] = Node('term', [p[1], t2, p[3], make_token(p, 4)])
+def p_term_or(p):
+ 'term : LPAREN or RPAREN'
+ p[0] = Node('term', [make_token(p, 1), p[2], make_token(p, 3)])
+def p_term_binary(p):
+ '''term : term PLUS term
+ | term MINUS term
+ | term STAR term
+ | term POW term
+ | term DIV term
+ | term IDIV term
+ | term MOD term
+
+ | term EQU term
+ | term NEQU term
+ | term EQ term
+ | term NEQ term
+ | term UNIV term
+ | term IS term
+ | term EQA term
+ | term NEQA term
+ | term LT term
+ | term LE term
+ | term GT term
+ | term GE term
+ | term LTL term
+ | term LEL term
+ | term GTL term
+ | term GEL term'''
+ p[0] = Node('term', [p[1], make_token(p, 2), p[3]])
+def p_term_unary(p):
+ '''term : NOT term
+ | MINUS term %prec UMINUS
+ | PLUS term %prec UPLUS'''
+ p[0] = Node('term', [make_token(p, 1), p[2]])
+def p_term_list(p):
+ 'term : list'
+ p[0] = Node('term', [p[1]])
+
+def p_term_simple(p):
+ '''term : STRING
+ | NAME
+ | UINTEGER
+ | UREAL
+ | VARIABLE'''
+ p[0] = Node('term', [make_token(p, 1)])
+
+def p_args_empty(p):
+ 'args : '
+ p[0] = Node('args', [])
+def p_args_single(p):
+ 'args : term'
+ p[0] = Node('args', [p[1]])
+def p_args_term(p):
+ 'args : args COMMA term'
+ p[0] = p[1]
+ p[0].children.append(make_token(p, 2))
+ p[0].children.append(p[3])
+
+def p_list_empty(p):
+ 'list : LBRACKET RBRACKET'
+ p[0] = Node('list', [make_token(p, 1), make_token(p, 2)])
+def p_list_term(p):
+ 'list : LBRACKET listexpr RBRACKET'
+ p[0] = Node('list', [make_token(p, 1), p[2], make_token(p, 3)])
+def p_listexpr_single(p):
+ 'listexpr : term'
+ p[0] = Node('listexpr', [p[1]])
+def p_listexpr_term(p):
+ 'listexpr : term COMMA listexpr'
+ p[0] = Node('listexpr', [p[1], make_token(p, 2), p[3]])
+def p_listexpr_pipe(p):
+ 'listexpr : term PIPE term'
+ p[0] = Node('listexpr', [p[1], make_token(p, 2), p[3]])
+
+def p_functor(p):
+ 'functor : NAME'
+ p[0] = Node('functor', [make_token(p, 1)])
+
+def p_error(p):
+ print('Syntax error in input: ' + str(p))
+
+parser = yacc.yacc()
+
+if __name__ == '__main__':
+ while True:
+ try:
+ s = input('> ')
+ except EOFError:
+ break
+ if not s:
+ continue
+ ast = parser.parse(s)
+ print(ast)
diff --git a/prolog/util.py b/prolog/util.py
index 83a3ade..3130ea5 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -2,8 +2,6 @@
from collections import namedtuple
-from .lexer import lexer, operators
-
# Stores a token's type and value, and optionally the position of the first
# character in the lexed stream.
class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])):
@@ -39,6 +37,9 @@ class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop']))
self.part if part is None else part,
self.stop if stop is None else stop)
+from .lexer import lexer, operators
+from .parser import parser
+
# Return a list of tokens in [text].
def tokenize(text):
lexer.input(text)
@@ -142,6 +143,11 @@ def compose(tokens):
prev = t
return code.strip()
+# Parse [text] into an AST.
+def parse(text):
+ tree = parser.parse(text)
+ return tree
+
# Rename variables in [tokens] to A0, A1, A2,… in order of appearance.
def rename_vars(tokens, names=None):
if names is None: