From 488c40522f831d7ef84efdd07f895479b79391c1 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Fri, 24 Feb 2017 12:13:51 +0100 Subject: prolog.parser: implement most remaining operators Also fix some precedence issues. --- prolog/lexer.py | 88 ++++++++++++++++++++++++++++++---------------------- prolog/parser.py | 94 +++++++++++++++++++++++++++++++++----------------------- 2 files changed, 108 insertions(+), 74 deletions(-) diff --git a/prolog/lexer.py b/prolog/lexer.py index 4e6c746..32a25fc 100644 --- a/prolog/lexer.py +++ b/prolog/lexer.py @@ -19,52 +19,65 @@ import ply.lex as lex operators = { - r':-': 'FROM', - r'-->': 'FROMDCG', - r'->': 'IMPLIES', - r'\+': 'NOT', - r'=': 'EQU', - r'\=': 'NEQU', - r'==': 'EQ', + r':-': 'FROM', + r'-->': 'FROMDCG', + r',': 'COMMA', + r';': 'SEMI', + r'->': 'IMPLIES', + r'*->': 'SOFTCUT', + r'\+': 'NOT', + r'=': 'EQU', + r'\=': 'NEQU', + r'=@=': 'EQV', + r'\=@=': 'NEQV', + r'==': 'EQ', r'\==': 'NEQ', r'=..': 'UNIV', - r'is': 'IS', + r'is': 'IS', r'=:=': 'EQA', r'=\=': 'NEQA', - r'<': 'LT', - r'=<': 'LE', - r'>': 'GT', - r'>=': 'GE', - r'@<': 'LTL', + r'<': 'LT', + r'=<': 'LE', + r'>': 'GT', + r'>=': 'GE', + r'@<': 'LTL', r'@=<': 'LEL', - r'@>': 'GTL', + r'@>': 'GTL', r'@>=': 'GEL', - r'#=': 'EQFD', - r'#\=': 'NEQFD', - r'#<': 'LTFD', - r'#=<': 'LEFD', - r'#>': 'GTFD', - r'#>=': 'GEFD', - r'in': 'IN', - r'ins': 'INS', - r'..': 'THROUGH', - r'+': 'PLUS', - r'-': 'MINUS', - r'*': 'STAR', - r'/': 'DIV', - r'//': 'IDIV', + r'+': 'PLUS', + r'-': 'MINUS', + r'*': 'STAR', + r'/': 'SLASH', + r'//': 'SLASH2', + r'<<': 'SHIFTLEFT', + r'>>': 'SHIFTRIGHT', + r'div': 'DIV', r'mod': 'MOD', - r'**': 'POW', - r'^': 'POW', - r'.': 'PERIOD', - r',': 'COMMA', - r';': 'SEMI' + r'rdiv': 'RDIV', + r'rem': 'REM', + r'xor': 'XOR', + r'^': 'POW', + r'**': 'POWSTAR', + '/\\': 'AND', + '\\': 'NEG', + + # CLP(FD) + r'in': 'IN', + r'ins': 'INS', + r'..': 'THROUGH', + r'#=': 'FDEQ', + r'#\=': 'FDNEQ', + r'#<': 'FDLT', + r'#=<': 'FDLE', + r'#>': 'FDGT', + r'#>=': 'FDGE', + r'\/': 'FDUNION', } tokens = sorted(list(operators.values())) + [ 'UINTEGER', 'UREAL', 'NAME', 'VARIABLE', 'STRING', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE', - 'INVALID' + 'PERIOD', 'INVALID' ] # punctuation @@ -88,8 +101,11 @@ def t_comment(t): def t_NAME(t): r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|," - # return appropriate tokens for names that are operators - t.type = operators.get(t.value, 'NAME') + if t.value == '.': + t.type = 'PERIOD' + else: + # return appropriate tokens for names that are operators + t.type = operators.get(t.value, 'NAME') return t t_ignore = ' \t' diff --git a/prolog/parser.py b/prolog/parser.py index 599ecd1..4efc07c 100644 --- a/prolog/parser.py +++ b/prolog/parser.py @@ -21,18 +21,19 @@ from .util import Token # PARSER precedence = ( - ('nonassoc', 'FROM', 'FROMDCG'), - ('right', 'PIPE'), - ('right', 'IMPLIES'), - ('right', 'NOT'), - ('nonassoc', 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', 'IN', 'INS', 'THROUGH', 'EQFD', 'NEQFD', 'LTFD', 'LEFD', 'GTFD', 'GEFD'), - ('left', 'PLUS', 'MINUS'), - ('left', 'STAR', 'DIV', 'IDIV', 'MOD'), - ('nonassoc', 'POW'), - ('nonassoc', 'UINTEGER', 'UREAL'), - ('nonassoc', 'NAME', 'VARIABLE', 'STRING'), - ('nonassoc', 'PERIOD'), - ('nonassoc', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'COMMA', 'SEMI', 'LBRACE', 'RBRACE') + ('nonassoc', 'FROM', 'FROMDCG'), # 1200 + ('right', 'PIPE'), # 1105 + ('right', 'SEMI'), # 1100 + ('right', 'IMPLIES', 'SOFTCUT'), # 1050 + ('right', 'COMMA'), # 1000 + ('right', 'NOT'), # 900 + ('nonassoc', 'EQU', 'NEQU', 'EQV', 'NEQV', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', 'IN', 'INS', 'FDEQ', 'FDNEQ', 'FDLT', 'FDLE', 'FDGT', 'FDGE'), # 700 + ('left', 'PLUS', 'MINUS', 'AND', 'FDUNION'), # 500 + ('nonassoc', 'THROUGH'), # 450 + ('left', 'SHIFTLEFT', 'SHIFTRIGHT', 'STAR', 'SLASH', 'SLASH2', 'DIV', 'MOD', 'RDIV', 'REM', 'XOR'), # 400 + ('right', 'POW'), # 200 + ('nonassoc', 'POWSTAR'), # 200 + ('right', 'UPLUS', 'UMINUS', 'NEG'), # 200 ) def token_start(p, n): @@ -90,7 +91,8 @@ def p_if_single(p): 'if : and' p[0] = p[1] def p_if_and(p): - 'if : and IMPLIES if' + '''if : and IMPLIES if + | and SOFTCUT if''' p[0] = make_tree('if', [p[1], p[3]], p[1].start, p[3].end) def p_and_single(p): @@ -149,90 +151,106 @@ def p_term_brace(p): def p_term_operator_infix(p): '''term : term PLUS term | term MINUS term + | term AND term | term STAR term | term POW term + | term POWSTAR term + | term SLASH term + | term SLASH2 term | term DIV term - | term IDIV term | term MOD term - + | term RDIV term + | term REM term + | term XOR term + | term SHIFTLEFT term + | term SHIFTRIGHT term | term EQU term | term NEQU term + | term EQV term + | term NEQV term | term EQ term | term NEQ term | term UNIV term | term IS term - | term EQA term | term NEQA term | term LT term | term LE term | term GT term | term GE term - | term LTL term | term LEL term | term GTL term | term GEL term - | term PIPE term | term THROUGH term | term IN term | term INS term - | term EQFD term - | term NEQFD term - | term LTFD term - | term LEFD term - | term GTFD term - | term GEFD term''' - p[0] = make_tree('binop', [p[1], make_token(p, 2), p[3]], p[1].start, p[3].end) + | term FDEQ term + | term FDNEQ term + | term FDLT term + | term FDLE term + | term FDGT term + | term FDGE term + | term FDUNION term''' + start, end = p[1].start, p[3].end + p[0] = make_tree('binop', [p[1], make_token(p, 2), p[3]], start, end) # binary expressions in functional notation, e.g. “+(1,2)” def p_term_operator_prefix(p): '''term : PLUS LPAREN term COMMA term RPAREN | MINUS LPAREN term COMMA term RPAREN + | AND LPAREN term COMMA term RPAREN | STAR LPAREN term COMMA term RPAREN | POW LPAREN term COMMA term RPAREN + | POWSTAR LPAREN term COMMA term RPAREN + | SLASH LPAREN term COMMA term RPAREN + | SLASH2 LPAREN term COMMA term RPAREN | DIV LPAREN term COMMA term RPAREN - | IDIV LPAREN term COMMA term RPAREN | MOD LPAREN term COMMA term RPAREN - + | RDIV LPAREN term COMMA term RPAREN + | REM LPAREN term COMMA term RPAREN + | XOR LPAREN term COMMA term RPAREN + | SHIFTLEFT LPAREN term COMMA term RPAREN + | SHIFTRIGHT LPAREN term COMMA term RPAREN | EQU LPAREN term COMMA term RPAREN | NEQU LPAREN term COMMA term RPAREN + | EQV LPAREN term COMMA term RPAREN + | NEQV LPAREN term COMMA term RPAREN | EQ LPAREN term COMMA term RPAREN | NEQ LPAREN term COMMA term RPAREN | UNIV LPAREN term COMMA term RPAREN | IS LPAREN term COMMA term RPAREN - | EQA LPAREN term COMMA term RPAREN | NEQA LPAREN term COMMA term RPAREN | LT LPAREN term COMMA term RPAREN | LE LPAREN term COMMA term RPAREN | GT LPAREN term COMMA term RPAREN | GE LPAREN term COMMA term RPAREN - | LTL LPAREN term COMMA term RPAREN | LEL LPAREN term COMMA term RPAREN | GTL LPAREN term COMMA term RPAREN | GEL LPAREN term COMMA term RPAREN - | PIPE LPAREN term COMMA term RPAREN | THROUGH LPAREN term COMMA term RPAREN | IN LPAREN term COMMA term RPAREN | INS LPAREN term COMMA term RPAREN - | EQFD LPAREN term COMMA term RPAREN - | NEQFD LPAREN term COMMA term RPAREN - | LTFD LPAREN term COMMA term RPAREN - | LEFD LPAREN term COMMA term RPAREN - | GTFD LPAREN term COMMA term RPAREN - | GEFD LPAREN term COMMA term RPAREN''' + | FDEQ LPAREN term COMMA term RPAREN + | FDNEQ LPAREN term COMMA term RPAREN + | FDLT LPAREN term COMMA term RPAREN + | FDLE LPAREN term COMMA term RPAREN + | FDGT LPAREN term COMMA term RPAREN + | FDGE LPAREN term COMMA term RPAREN + | FDUNION LPAREN term COMMA term RPAREN''' start, end = token_start(p, 1), token_end(p, 6) p[0] = make_tree('binop', [p[3], make_token(p, 1), p[5]], start, end) # unary operators def p_term_operator_unary(p): '''term : NOT term - | MINUS term - | PLUS term''' + | MINUS term %prec UMINUS + | PLUS term %prec UPLUS + | NEG term''' # shift/reduce conflict for MINUS and PLUS with p_term_operator_prefix above: # ply prefers shifting and will resolve +(2,2) to the binary expression “2+2” # instead of the unary “+ (2,2)” (this behavior is what we want) -- cgit v1.2.1