# CodeQ: an online programming tutor. # Copyright (C) 2015-2017 UL FRI # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) any # later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from nltk import ParentedTree import ply.yacc as yacc from .lexer import tokens from .util import Token # PARSER precedence = ( ('nonassoc', 'FROM', 'FROMDCG'), ('right', 'PIPE'), ('right', 'IMPLIES'), ('right', 'NOT'), ('nonassoc', 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', 'IN', 'INS', 'THROUGH', 'EQFD', 'NEQFD', 'LTFD', 'LEFD', 'GTFD', 'GEFD'), ('left', 'PLUS', 'MINUS'), ('left', 'STAR', 'DIV', 'IDIV', 'MOD'), ('nonassoc', 'POW'), ('nonassoc', 'UINTEGER', 'UREAL'), ('nonassoc', 'NAME', 'VARIABLE', 'STRING'), ('nonassoc', 'PERIOD'), ('nonassoc', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'COMMA', 'SEMI', 'LBRACE', 'RBRACE') ) def token_start(p, n): return p.slice[n].lexpos def token_end(p, n): return p.slice[n].lexpos + len(p.slice[n].value) def make_token(p, n): lextoken = p.slice[n] return Token(lextoken.type, lextoken.value, lextoken.lexpos) def make_tree(label, children, start=0, end=0): tree = ParentedTree(label, children) tree.start = start tree.end = end return tree # return a new tree labeled name with a single child p[n] def wrap(name, p, n=1): if isinstance(p[n], ParentedTree): return make_tree(name, [p[n]], p[n].start, p[n].end) else: return make_tree(name, [make_token(p, n)], token_start(p, n), token_end(p, n)) def p_text_empty(p): 'text : ' p[0] = make_tree('text', []) def p_text_clause(p): 'text : text clause' p[0] = p[1] p[0].append(p[2]) p[0].end = p[2].end def p_clause_fact(p): 'clause : or PERIOD' p[0] = make_tree('clause', [wrap('head', p, 1)], p[1].start, token_end(p, 2)) def p_clause_rule(p): '''clause : or FROM or PERIOD | or FROMDCG or PERIOD''' p[0] = make_tree('clause', [wrap('head', p, 1), p[3]], p[1].start, token_end(p, 4)) def p_clause_directive(p): '''clause : FROM or PERIOD''' p[0] = make_tree('directive', [p[2]], token_start(p, 1), token_end(p, 3)) def p_or_single(p): 'or : if' p[0] = p[1] def p_or_if(p): 'or : or SEMI if' p[0] = make_tree('or', [p[1], p[3]], p[1].start, p[3].end) def p_if_single(p): 'if : and' p[0] = p[1] def p_if_and(p): 'if : and IMPLIES if' p[0] = make_tree('if', [p[1], p[3]], p[1].start, p[3].end) def p_and_single(p): 'and : term' p[0] = p[1] def p_and_term(p): 'and : term COMMA and' p[0] = make_tree('and', [p[1], p[3]], p[1].start, p[3].end) # simple terms def p_term_variable(p): 'term : VARIABLE' p[0] = wrap('variable', p) def p_term_simple(p): '''term : NAME | STRING | UINTEGER | UREAL''' p[0] = wrap('literal', p) # compound terms def p_term_functor_zero(p): 'term : functor LPAREN RPAREN' # special case for zero-arity predicates supported by SWI-Prolog p[0] = make_tree('compound', [p[1]], p[1].start, token_end(p, 3)) def p_term_functor(p): 'term : functor LPAREN args RPAREN' p[0] = make_tree('compound', [p[1], p[3]], p[1].start, token_end(p, 4)) # compound term functor def p_functor(p): 'functor : NAME' p[0] = wrap('functor', p) # compound term arguments def p_args_single(p): 'args : term' p[0] = wrap('args', p) def p_args_multiple(p): 'args : term COMMA args' p[0] = make_tree('args', [p[1], p[3]], p[1].start, p[3].end) # covers parenthesized terms, e.g. “(a, b ; c)” def p_term_or(p): 'term : LPAREN or RPAREN' p[0] = p[2] p[0].start = token_start(p, 1) p[0].end = token_end(p, 3) # covers terms in braces used for CLP(R) and DCGs, e.g. “{ X = 1 ; X = 2}” def p_term_brace(p): 'term : LBRACE or RBRACE' p[0] = make_tree('braced', [p[2]], token_start(p, 1), token_end(p, 3)) # binary expressions, e.g. “1 + 2” def p_term_operator_infix(p): '''term : term PLUS term | term MINUS term | term STAR term | term POW term | term DIV term | term IDIV term | term MOD term | term EQU term | term NEQU term | term EQ term | term NEQ term | term UNIV term | term IS term | term EQA term | term NEQA term | term LT term | term LE term | term GT term | term GE term | term LTL term | term LEL term | term GTL term | term GEL term | term PIPE term | term THROUGH term | term IN term | term INS term | term EQFD term | term NEQFD term | term LTFD term | term LEFD term | term GTFD term | term GEFD term''' p[0] = make_tree('binop', [p[1], make_token(p, 2), p[3]], p[1].start, p[3].end) # binary expressions in functional notation, e.g. “+(1,2)” def p_term_operator_prefix(p): '''term : PLUS LPAREN term COMMA term RPAREN | MINUS LPAREN term COMMA term RPAREN | STAR LPAREN term COMMA term RPAREN | POW LPAREN term COMMA term RPAREN | DIV LPAREN term COMMA term RPAREN | IDIV LPAREN term COMMA term RPAREN | MOD LPAREN term COMMA term RPAREN | EQU LPAREN term COMMA term RPAREN | NEQU LPAREN term COMMA term RPAREN | EQ LPAREN term COMMA term RPAREN | NEQ LPAREN term COMMA term RPAREN | UNIV LPAREN term COMMA term RPAREN | IS LPAREN term COMMA term RPAREN | EQA LPAREN term COMMA term RPAREN | NEQA LPAREN term COMMA term RPAREN | LT LPAREN term COMMA term RPAREN | LE LPAREN term COMMA term RPAREN | GT LPAREN term COMMA term RPAREN | GE LPAREN term COMMA term RPAREN | LTL LPAREN term COMMA term RPAREN | LEL LPAREN term COMMA term RPAREN | GTL LPAREN term COMMA term RPAREN | GEL LPAREN term COMMA term RPAREN | PIPE LPAREN term COMMA term RPAREN | THROUGH LPAREN term COMMA term RPAREN | IN LPAREN term COMMA term RPAREN | INS LPAREN term COMMA term RPAREN | EQFD LPAREN term COMMA term RPAREN | NEQFD LPAREN term COMMA term RPAREN | LTFD LPAREN term COMMA term RPAREN | LEFD LPAREN term COMMA term RPAREN | GTFD LPAREN term COMMA term RPAREN | GEFD LPAREN term COMMA term RPAREN''' start, end = token_start(p, 1), token_end(p, 6) p[0] = make_tree('binop', [p[3], make_token(p, 1), p[5]], start, end) # unary operators def p_term_operator_unary(p): '''term : NOT term | MINUS term | PLUS term''' # shift/reduce conflict for MINUS and PLUS with p_term_operator_prefix above: # ply prefers shifting and will resolve +(2,2) to the binary expression “2+2” # instead of the unary “+ (2,2)” (this behavior is what we want) p[0] = make_tree('unop', [make_token(p, 1), p[2]], token_start(p, 1), p[2].end) # lists def p_term_list(p): 'term : list' p[0] = p[1] def p_list_empty(p): 'list : LBRACKET RBRACKET' start, end = token_start(p, 1), token_end(p, 2) p[0] = make_tree('literal', [Token(type='NIL', val='[]', pos=start)], start, end) def p_list(p): 'list : LBRACKET elems RBRACKET' p[0] = p[2] p[0].start = token_start(p, 1) p[0].end = token_end(p, 3) # list elements def p_elems_single(p): 'elems : term' if p[1].label() == 'binop' and p[1][1].type == 'PIPE': # p[1] has three children: left, "|", right left = p[1].pop(0) right = p[1].pop(1) p[0] = make_tree('list', [left, right], p[1].start, right.end) else: # p[1] has one child: term start, end = p[1].start, p[1].end nil = make_tree('literal', [Token(type='NIL', val='[]', pos=start)], start, end) p[0] = make_tree('list', [p[1], nil], start, end) def p_elems_multiple(p): 'elems : term COMMA elems' p[0] = make_tree('list', [p[1], p[3]], p[1].start, p[3].end) def p_error(t): if t is not None: raise SyntaxError('at position {}: unexpected ‘{}’'.format(t.lexpos, t.value)) else: raise SyntaxError('unexpected EOF') parser = yacc.yacc(debug=True) if __name__ == '__main__': from .util import stringify while True: try: s = input('> ') except EOFError: break if not s: continue ast = parser.parse(s) def pp(node): if isinstance(node, ParentedTree): return '(' + node.label() + ' ' + ' '.join([pp(child) for child in node]) + ')' return '"' + str(node) + '"' print(pp(ast)) print(stringify(ast))