From 1f8fa5268bdcccac22b86b4b1d8e95b3397c14a7 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Thu, 24 Sep 2015 18:07:14 +0200 Subject: Use the NLTK ParentedTree class for parser output --- prolog/parser.py | 82 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/prolog/parser.py b/prolog/parser.py index 6166887..5b5c325 100644 --- a/prolog/parser.py +++ b/prolog/parser.py @@ -14,10 +14,10 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from nltk import ParentedTree as Tree import ply.yacc as yacc from .lexer import operators, tokens from .util import Token -from monkey.graph import Node # PARSER precedence = ( @@ -37,71 +37,77 @@ precedence = ( def make_token(p, n): lextoken = p.slice[n] - return Node(data=Token(lextoken.type, lextoken.value, lextoken.lexpos)) + return Token(lextoken.type, lextoken.value, lextoken.lexpos) def p_text_empty(p): 'text : ' - p[0] = Node('text', []) + p[0] = Tree('text', []) def p_text_clause(p): 'text : text clause' p[0] = p[1] - p[0].eout.append(p[2]) + p[0].append(p[2]) def p_clause_head(p): 'clause : head PERIOD' - p[0] = Node('clause', [p[1], make_token(p, 2)]) + p[0] = Tree('clause', [p[1], make_token(p, 2)]) def p_clause_rule(p): 'clause : head FROM or PERIOD' - p[0] = Node('clause', [p[1], make_token(p, 2), p[3], make_token(p, 4)]) -def p_clause_error(p): - 'clause : error PERIOD' - p[0] = Node('clause', [Node('error'), make_token(p, 2)]) + p[0] = Tree('clause', [p[1], make_token(p, 2), p[3], make_token(p, 4)]) def p_head(p): 'head : term' - p[0] = Node('head', [p[1]]) + p[0] = Tree('head', [p[1]]) def p_or_single(p): 'or : if' p[0] = p[1] def p_or_if(p): 'or : or SEMI if' - if p[1].data == 'or': + if p[1].label() == 'or': p[0] = p[1] else: - p[0] = Node('or', [p[1]]) - p[0].eout.append(make_token(p, 2)) - p[0].eout.append(p[3]) + p[0] = Tree('or', [p[1]]) + p[0].append(make_token(p, 2)) + p[0].append(p[3]) def p_if_single(p): 'if : and' p[0] = p[1] def p_if_and(p): 'if : and IMPLIES if' - p[0] = Node('if', [p[1], make_token(p, 2), p[3]]) + p[0] = Tree('if', [p[1], make_token(p, 2), p[3]]) def p_and_single(p): 'and : term' p[0] = p[1] def p_and_term(p): 'and : and COMMA term' - if p[1].data == 'and': + if p[1].label() == 'and': p[0] = p[1] else: - p[0] = Node('and', [p[1]]) - p[0].eout.append(make_token(p, 2)) - p[0].eout.append(p[3]) + p[0] = Tree('and', [p[1]]) + p[0].append(make_token(p, 2)) + p[0].append(p[3]) +# Special case for zero-arity predicates supported by SWI-Prolog. +def p_term_functor_zero(p): + 'term : functor LPAREN RPAREN' + # No whitespace allowed between functor and LPAREN. + t2 = make_token(p, 2) + if p[1][0].pos + len(p[1][0].val) < t2.pos: + raise SyntaxError('whitespace before ' + str(t2)) + p[0] = Tree('term', [p[1], t2, make_token(p, 3)]) def p_term_functor(p): 'term : functor LPAREN args RPAREN' # No whitespace allowed between functor and LPAREN. t2 = make_token(p, 2) - if p[1].eout[0].data.pos + len(p[1].eout[0].data.val) < t2.data.pos: + if p[1][0].pos + len(p[1][0].val) < t2.pos: raise SyntaxError('whitespace before ' + str(t2)) - p[0] = Node('term', [p[1], t2, p[3], make_token(p, 4)]) + p[0] = Tree('term', [p[1], t2, p[3], make_token(p, 4)]) + def p_term_or(p): 'term : LPAREN or RPAREN' - p[0] = Node('term', [make_token(p, 1), p[2], make_token(p, 3)]) + p[0] = Tree('term', [make_token(p, 1), p[2], make_token(p, 3)]) def p_term_binary(p): '''term : term PLUS term | term MINUS term @@ -127,15 +133,15 @@ def p_term_binary(p): | term LEL term | term GTL term | term GEL term''' - p[0] = Node('term', [p[1], make_token(p, 2), p[3]]) + p[0] = Tree('term', [p[1], make_token(p, 2), p[3]]) def p_term_unary(p): '''term : NOT term | MINUS term %prec UMINUS | PLUS term %prec UPLUS''' - p[0] = Node('term', [make_token(p, 1), p[2]]) + p[0] = Tree('term', [make_token(p, 1), p[2]]) def p_term_list(p): 'term : list' - p[0] = Node('term', [p[1]]) + p[0] = Tree('term', [p[1]]) def p_term_simple(p): '''term : STRING @@ -143,36 +149,36 @@ def p_term_simple(p): | UINTEGER | UREAL | VARIABLE''' - p[0] = Node('term', [make_token(p, 1)]) + p[0] = Tree('term', [make_token(p, 1)]) -def p_args_empty(p): - 'args : ' - p[0] = Node('args', []) def p_args_single(p): 'args : term' - p[0] = Node('args', [p[1]]) + p[0] = Tree('args', [p[1]]) def p_args_term(p): 'args : args COMMA term' p[0] = p[1] - p[0].eout.append(make_token(p, 2)) - p[0].eout.append(p[3]) + p[0].append(make_token(p, 2)) + p[0].append(p[3]) +def p_list_empty(p): + 'list : LBRACKET RBRACKET' + p[0] = Tree('list', [make_token(p, 1), make_token(p, 2)]) def p_list(p): 'list : LBRACKET args RBRACKET' - p[0] = Node('list', [make_token(p, 1)] + p[2].eout + [make_token(p, 3)]) + p[0] = Tree('list', [make_token(p, 1)] + p[2] + [make_token(p, 3)]) def p_list_tail(p): 'list : LBRACKET args PIPE term RBRACKET' - p[0] = Node('list', [make_token(p, 1)] + p[2].eout + [make_token(p, 3), p[4], make_token(p, 5)]) + p[0] = Tree('list', [make_token(p, 1)] + p[2] + [make_token(p, 3), p[4], make_token(p, 5)]) def p_functor(p): 'functor : NAME' - p[0] = Node('functor', [make_token(p, 1)]) + p[0] = Tree('functor', [make_token(p, 1)]) def p_error(t): if t is None: - print('unexpected end of file') + raise SyntaxError('unexpected end of file') else: - print('{}: unexpected {}'.format(t.lexpos, t.value)) + raise SyntaxError('{}: unexpected {}'.format(t.lexpos, t.value)) parser = yacc.yacc() @@ -185,4 +191,4 @@ if __name__ == '__main__': if not s: continue ast = parser.parse(s) - print(ast) + print(repr(ast)) -- cgit v1.2.1