summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2017-02-24 12:13:51 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2017-02-24 12:45:55 +0100
commit488c40522f831d7ef84efdd07f895479b79391c1 (patch)
treefd1d79ce7848358e84623050315bdbd672279665
parent5ce873e545ecd77710c63dfa3f3bbb3d41fe3aa0 (diff)
prolog.parser: implement most remaining operators
Also fix some precedence issues.
-rw-r--r--prolog/lexer.py88
-rw-r--r--prolog/parser.py94
2 files changed, 108 insertions, 74 deletions
diff --git a/prolog/lexer.py b/prolog/lexer.py
index 4e6c746..32a25fc 100644
--- a/prolog/lexer.py
+++ b/prolog/lexer.py
@@ -19,52 +19,65 @@
import ply.lex as lex
operators = {
- r':-': 'FROM',
- r'-->': 'FROMDCG',
- r'->': 'IMPLIES',
- r'\+': 'NOT',
- r'=': 'EQU',
- r'\=': 'NEQU',
- r'==': 'EQ',
+ r':-': 'FROM',
+ r'-->': 'FROMDCG',
+ r',': 'COMMA',
+ r';': 'SEMI',
+ r'->': 'IMPLIES',
+ r'*->': 'SOFTCUT',
+ r'\+': 'NOT',
+ r'=': 'EQU',
+ r'\=': 'NEQU',
+ r'=@=': 'EQV',
+ r'\=@=': 'NEQV',
+ r'==': 'EQ',
r'\==': 'NEQ',
r'=..': 'UNIV',
- r'is': 'IS',
+ r'is': 'IS',
r'=:=': 'EQA',
r'=\=': 'NEQA',
- r'<': 'LT',
- r'=<': 'LE',
- r'>': 'GT',
- r'>=': 'GE',
- r'@<': 'LTL',
+ r'<': 'LT',
+ r'=<': 'LE',
+ r'>': 'GT',
+ r'>=': 'GE',
+ r'@<': 'LTL',
r'@=<': 'LEL',
- r'@>': 'GTL',
+ r'@>': 'GTL',
r'@>=': 'GEL',
- r'#=': 'EQFD',
- r'#\=': 'NEQFD',
- r'#<': 'LTFD',
- r'#=<': 'LEFD',
- r'#>': 'GTFD',
- r'#>=': 'GEFD',
- r'in': 'IN',
- r'ins': 'INS',
- r'..': 'THROUGH',
- r'+': 'PLUS',
- r'-': 'MINUS',
- r'*': 'STAR',
- r'/': 'DIV',
- r'//': 'IDIV',
+ r'+': 'PLUS',
+ r'-': 'MINUS',
+ r'*': 'STAR',
+ r'/': 'SLASH',
+ r'//': 'SLASH2',
+ r'<<': 'SHIFTLEFT',
+ r'>>': 'SHIFTRIGHT',
+ r'div': 'DIV',
r'mod': 'MOD',
- r'**': 'POW',
- r'^': 'POW',
- r'.': 'PERIOD',
- r',': 'COMMA',
- r';': 'SEMI'
+ r'rdiv': 'RDIV',
+ r'rem': 'REM',
+ r'xor': 'XOR',
+ r'^': 'POW',
+ r'**': 'POWSTAR',
+ '/\\': 'AND',
+ '\\': 'NEG',
+
+ # CLP(FD)
+ r'in': 'IN',
+ r'ins': 'INS',
+ r'..': 'THROUGH',
+ r'#=': 'FDEQ',
+ r'#\=': 'FDNEQ',
+ r'#<': 'FDLT',
+ r'#=<': 'FDLE',
+ r'#>': 'FDGT',
+ r'#>=': 'FDGE',
+ r'\/': 'FDUNION',
}
tokens = sorted(list(operators.values())) + [
'UINTEGER', 'UREAL',
'NAME', 'VARIABLE', 'STRING',
'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE',
- 'INVALID'
+ 'PERIOD', 'INVALID'
]
# punctuation
@@ -88,8 +101,11 @@ def t_comment(t):
def t_NAME(t):
r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|,"
- # return appropriate tokens for names that are operators
- t.type = operators.get(t.value, 'NAME')
+ if t.value == '.':
+ t.type = 'PERIOD'
+ else:
+ # return appropriate tokens for names that are operators
+ t.type = operators.get(t.value, 'NAME')
return t
t_ignore = ' \t'
diff --git a/prolog/parser.py b/prolog/parser.py
index 599ecd1..4efc07c 100644
--- a/prolog/parser.py
+++ b/prolog/parser.py
@@ -21,18 +21,19 @@ from .util import Token
# PARSER
precedence = (
- ('nonassoc', 'FROM', 'FROMDCG'),
- ('right', 'PIPE'),
- ('right', 'IMPLIES'),
- ('right', 'NOT'),
- ('nonassoc', 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', 'IN', 'INS', 'THROUGH', 'EQFD', 'NEQFD', 'LTFD', 'LEFD', 'GTFD', 'GEFD'),
- ('left', 'PLUS', 'MINUS'),
- ('left', 'STAR', 'DIV', 'IDIV', 'MOD'),
- ('nonassoc', 'POW'),
- ('nonassoc', 'UINTEGER', 'UREAL'),
- ('nonassoc', 'NAME', 'VARIABLE', 'STRING'),
- ('nonassoc', 'PERIOD'),
- ('nonassoc', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'COMMA', 'SEMI', 'LBRACE', 'RBRACE')
+ ('nonassoc', 'FROM', 'FROMDCG'), # 1200
+ ('right', 'PIPE'), # 1105
+ ('right', 'SEMI'), # 1100
+ ('right', 'IMPLIES', 'SOFTCUT'), # 1050
+ ('right', 'COMMA'), # 1000
+ ('right', 'NOT'), # 900
+ ('nonassoc', 'EQU', 'NEQU', 'EQV', 'NEQV', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', 'IN', 'INS', 'FDEQ', 'FDNEQ', 'FDLT', 'FDLE', 'FDGT', 'FDGE'), # 700
+ ('left', 'PLUS', 'MINUS', 'AND', 'FDUNION'), # 500
+ ('nonassoc', 'THROUGH'), # 450
+ ('left', 'SHIFTLEFT', 'SHIFTRIGHT', 'STAR', 'SLASH', 'SLASH2', 'DIV', 'MOD', 'RDIV', 'REM', 'XOR'), # 400
+ ('right', 'POW'), # 200
+ ('nonassoc', 'POWSTAR'), # 200
+ ('right', 'UPLUS', 'UMINUS', 'NEG'), # 200
)
def token_start(p, n):
@@ -90,7 +91,8 @@ def p_if_single(p):
'if : and'
p[0] = p[1]
def p_if_and(p):
- 'if : and IMPLIES if'
+ '''if : and IMPLIES if
+ | and SOFTCUT if'''
p[0] = make_tree('if', [p[1], p[3]], p[1].start, p[3].end)
def p_and_single(p):
@@ -149,90 +151,106 @@ def p_term_brace(p):
def p_term_operator_infix(p):
'''term : term PLUS term
| term MINUS term
+ | term AND term
| term STAR term
| term POW term
+ | term POWSTAR term
+ | term SLASH term
+ | term SLASH2 term
| term DIV term
- | term IDIV term
| term MOD term
-
+ | term RDIV term
+ | term REM term
+ | term XOR term
+ | term SHIFTLEFT term
+ | term SHIFTRIGHT term
| term EQU term
| term NEQU term
+ | term EQV term
+ | term NEQV term
| term EQ term
| term NEQ term
| term UNIV term
| term IS term
-
| term EQA term
| term NEQA term
| term LT term
| term LE term
| term GT term
| term GE term
-
| term LTL term
| term LEL term
| term GTL term
| term GEL term
-
| term PIPE term
| term THROUGH term
| term IN term
| term INS term
- | term EQFD term
- | term NEQFD term
- | term LTFD term
- | term LEFD term
- | term GTFD term
- | term GEFD term'''
- p[0] = make_tree('binop', [p[1], make_token(p, 2), p[3]], p[1].start, p[3].end)
+ | term FDEQ term
+ | term FDNEQ term
+ | term FDLT term
+ | term FDLE term
+ | term FDGT term
+ | term FDGE term
+ | term FDUNION term'''
+ start, end = p[1].start, p[3].end
+ p[0] = make_tree('binop', [p[1], make_token(p, 2), p[3]], start, end)
# binary expressions in functional notation, e.g. “+(1,2)”
def p_term_operator_prefix(p):
'''term : PLUS LPAREN term COMMA term RPAREN
| MINUS LPAREN term COMMA term RPAREN
+ | AND LPAREN term COMMA term RPAREN
| STAR LPAREN term COMMA term RPAREN
| POW LPAREN term COMMA term RPAREN
+ | POWSTAR LPAREN term COMMA term RPAREN
+ | SLASH LPAREN term COMMA term RPAREN
+ | SLASH2 LPAREN term COMMA term RPAREN
| DIV LPAREN term COMMA term RPAREN
- | IDIV LPAREN term COMMA term RPAREN
| MOD LPAREN term COMMA term RPAREN
-
+ | RDIV LPAREN term COMMA term RPAREN
+ | REM LPAREN term COMMA term RPAREN
+ | XOR LPAREN term COMMA term RPAREN
+ | SHIFTLEFT LPAREN term COMMA term RPAREN
+ | SHIFTRIGHT LPAREN term COMMA term RPAREN
| EQU LPAREN term COMMA term RPAREN
| NEQU LPAREN term COMMA term RPAREN
+ | EQV LPAREN term COMMA term RPAREN
+ | NEQV LPAREN term COMMA term RPAREN
| EQ LPAREN term COMMA term RPAREN
| NEQ LPAREN term COMMA term RPAREN
| UNIV LPAREN term COMMA term RPAREN
| IS LPAREN term COMMA term RPAREN
-
| EQA LPAREN term COMMA term RPAREN
| NEQA LPAREN term COMMA term RPAREN
| LT LPAREN term COMMA term RPAREN
| LE LPAREN term COMMA term RPAREN
| GT LPAREN term COMMA term RPAREN
| GE LPAREN term COMMA term RPAREN
-
| LTL LPAREN term COMMA term RPAREN
| LEL LPAREN term COMMA term RPAREN
| GTL LPAREN term COMMA term RPAREN
| GEL LPAREN term COMMA term RPAREN
-
| PIPE LPAREN term COMMA term RPAREN
| THROUGH LPAREN term COMMA term RPAREN
| IN LPAREN term COMMA term RPAREN
| INS LPAREN term COMMA term RPAREN
- | EQFD LPAREN term COMMA term RPAREN
- | NEQFD LPAREN term COMMA term RPAREN
- | LTFD LPAREN term COMMA term RPAREN
- | LEFD LPAREN term COMMA term RPAREN
- | GTFD LPAREN term COMMA term RPAREN
- | GEFD LPAREN term COMMA term RPAREN'''
+ | FDEQ LPAREN term COMMA term RPAREN
+ | FDNEQ LPAREN term COMMA term RPAREN
+ | FDLT LPAREN term COMMA term RPAREN
+ | FDLE LPAREN term COMMA term RPAREN
+ | FDGT LPAREN term COMMA term RPAREN
+ | FDGE LPAREN term COMMA term RPAREN
+ | FDUNION LPAREN term COMMA term RPAREN'''
start, end = token_start(p, 1), token_end(p, 6)
p[0] = make_tree('binop', [p[3], make_token(p, 1), p[5]], start, end)
# unary operators
def p_term_operator_unary(p):
'''term : NOT term
- | MINUS term
- | PLUS term'''
+ | MINUS term %prec UMINUS
+ | PLUS term %prec UPLUS
+ | NEG term'''
# shift/reduce conflict for MINUS and PLUS with p_term_operator_prefix above:
# ply prefers shifting and will resolve +(2,2) to the binary expression “2+2”
# instead of the unary “+ (2,2)” (this behavior is what we want)