From 4cffac21da85476e3656b79a8da3a019429740f1 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Fri, 27 Mar 2015 14:22:49 +0100 Subject: Add a parser for (simplified) Prolog --- prolog/parser.py | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ prolog/util.py | 10 ++- 2 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 prolog/parser.py diff --git a/prolog/parser.py b/prolog/parser.py new file mode 100644 index 0000000..ec42b5b --- /dev/null +++ b/prolog/parser.py @@ -0,0 +1,192 @@ +#!/usr/bin/python3 + +import ply.yacc as yacc +from .lexer import operators, tokens +from .util import Token + +# PARSER +precedence = ( + ('nonassoc', 'FROM'), + ('right', 'IMPLIES'), + ('right', 'NOT'), + ('nonassoc', 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL'), + ('left', 'PLUS', 'MINUS'), + ('left', 'STAR', 'DIV', 'IDIV', 'MOD'), + ('nonassoc', 'POW'), + ('right', 'UMINUS', 'UPLUS'), + ('nonassoc', 'UINTEGER', 'UREAL'), + ('nonassoc', 'NAME', 'VARIABLE', 'STRING'), + ('nonassoc', 'PERIOD'), + ('nonassoc', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'COMMA', 'SEMI', 'PIPE', 'LBRACE', 'RBRACE') +) + +class Node: + def __init__(self, type, children=None, data=None): + self.type = type + self.data = data + self.children = children if children else [] + + def __str__(self): + val = self.type + if self.children: + val += ' ' + ' '.join([str(c) for c in self.children]) + val = '({})'.format(val) + return val + +def make_token(p, n): + lextoken = p.slice[n] + return Token(lextoken.type, lextoken.value, lextoken.lexpos) + +def p_text_empty(p): + 'text : ' + p[0] = Node('text', []) +def p_text_clause(p): + 'text : text clause' + p[0] = p[1] + p[0].children.append(p[2]) + +def p_clause_head(p): + 'clause : head PERIOD' + p[0] = Node('clause', [p[1], make_token(p, 2)]) +def p_clause_rule(p): + 'clause : head FROM or PERIOD' + p[0] = Node('clause', [p[1], make_token(p, 2), p[3], make_token(p, 4)]) +def p_clause_error(p): + 'clause : error PERIOD' + p[0] = Node('error') + +def p_head(p): + 'head : term' + p[0] = Node('head', [p[1]]) + +def p_or_single(p): + 'or : if' + p[0] = p[1] +def p_or_if(p): + 'or : or SEMI if' + if p[1].type == 'or': + p[0] = p[1] + else: + p[0] = Node('or', [p[1]]) + p[0].children.append(make_token(p, 2)) + p[0].children.append(p[3]) + +def p_if_single(p): + 'if : and' + p[0] = p[1] +def p_if_and(p): + 'if : and IMPLIES if' + p[0] = Node('if', [p[1], make_token(p, 2), p[3]]) + +def p_and_single(p): + 'and : term' + p[0] = p[1] +def p_and_term(p): + 'and : and COMMA term' + if p[1].type == 'and': + p[0] = p[1] + else: + p[0] = Node('and', [p[1]]) + p[0].children.append(make_token(p, 2)) + p[0].children.append(p[3]) + +def p_term_functor(p): + 'term : functor LPAREN args RPAREN' + # No whitespace allowed between functor and LPAREN. + t2 = make_token(p, 2) + if p[1].children[0].pos + len(p[1].children[0].val) < t2.pos: + raise SyntaxError('whitespace before ' + str(t2)) + p[0] = Node('term', [p[1], t2, p[3], make_token(p, 4)]) +def p_term_or(p): + 'term : LPAREN or RPAREN' + p[0] = Node('term', [make_token(p, 1), p[2], make_token(p, 3)]) +def p_term_binary(p): + '''term : term PLUS term + | term MINUS term + | term STAR term + | term POW term + | term DIV term + | term IDIV term + | term MOD term + + | term EQU term + | term NEQU term + | term EQ term + | term NEQ term + | term UNIV term + | term IS term + | term EQA term + | term NEQA term + | term LT term + | term LE term + | term GT term + | term GE term + | term LTL term + | term LEL term + | term GTL term + | term GEL term''' + p[0] = Node('term', [p[1], make_token(p, 2), p[3]]) +def p_term_unary(p): + '''term : NOT term + | MINUS term %prec UMINUS + | PLUS term %prec UPLUS''' + p[0] = Node('term', [make_token(p, 1), p[2]]) +def p_term_list(p): + 'term : list' + p[0] = Node('term', [p[1]]) + +def p_term_simple(p): + '''term : STRING + | NAME + | UINTEGER + | UREAL + | VARIABLE''' + p[0] = Node('term', [make_token(p, 1)]) + +def p_args_empty(p): + 'args : ' + p[0] = Node('args', []) +def p_args_single(p): + 'args : term' + p[0] = Node('args', [p[1]]) +def p_args_term(p): + 'args : args COMMA term' + p[0] = p[1] + p[0].children.append(make_token(p, 2)) + p[0].children.append(p[3]) + +def p_list_empty(p): + 'list : LBRACKET RBRACKET' + p[0] = Node('list', [make_token(p, 1), make_token(p, 2)]) +def p_list_term(p): + 'list : LBRACKET listexpr RBRACKET' + p[0] = Node('list', [make_token(p, 1), p[2], make_token(p, 3)]) +def p_listexpr_single(p): + 'listexpr : term' + p[0] = Node('listexpr', [p[1]]) +def p_listexpr_term(p): + 'listexpr : term COMMA listexpr' + p[0] = Node('listexpr', [p[1], make_token(p, 2), p[3]]) +def p_listexpr_pipe(p): + 'listexpr : term PIPE term' + p[0] = Node('listexpr', [p[1], make_token(p, 2), p[3]]) + +def p_functor(p): + 'functor : NAME' + p[0] = Node('functor', [make_token(p, 1)]) + +def p_error(p): + print('Syntax error in input: ' + str(p)) + +parser = yacc.yacc() + +if __name__ == '__main__': + while True: + try: + s = input('> ') + except EOFError: + break + if not s: + continue + ast = parser.parse(s) + print(ast) diff --git a/prolog/util.py b/prolog/util.py index 83a3ade..3130ea5 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -2,8 +2,6 @@ from collections import namedtuple -from .lexer import lexer, operators - # Stores a token's type and value, and optionally the position of the first # character in the lexed stream. class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])): @@ -39,6 +37,9 @@ class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])) self.part if part is None else part, self.stop if stop is None else stop) +from .lexer import lexer, operators +from .parser import parser + # Return a list of tokens in [text]. def tokenize(text): lexer.input(text) @@ -142,6 +143,11 @@ def compose(tokens): prev = t return code.strip() +# Parse [text] into an AST. +def parse(text): + tree = parser.parse(text) + return tree + # Rename variables in [tokens] to A0, A1, A2,… in order of appearance. def rename_vars(tokens, names=None): if names is None: -- cgit v1.2.1