summaryrefslogtreecommitdiff
path: root/prolog/lexer.py
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2014-01-29 13:23:04 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:25:59 +0200
commit1a99d21e12dad4c01d4c892ed4e8a0506bcf36aa (patch)
tree5b98529f8fd2d546b1a7338fbe6765d4989aa5b3 /prolog/lexer.py
Initial commit for pymonkey
Diffstat (limited to 'prolog/lexer.py')
-rwxr-xr-xprolog/lexer.py92
1 files changed, 92 insertions, 0 deletions
diff --git a/prolog/lexer.py b/prolog/lexer.py
new file mode 100755
index 0000000..008051c
--- /dev/null
+++ b/prolog/lexer.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python3
+
+import ply.lex as lex
+
+# LEXER
+
+#states = (
+# ('comment', 'exclusive'),
+#)
+
+# tokens; treat operators as names if followed by (
+operators = {
+ r':-': 'FROM',
+ r'->': 'IMPLIES',
+ r'\+': 'NOT',
+ r'not': 'NOT',
+ r'=': 'EQU',
+ r'\=': 'NEQU',
+ r'==': 'EQ',
+ r'\==': 'NEQ',
+ r'=..': 'UNIV',
+ r'is': 'IS',
+ r'=:=': 'EQA',
+ r'=\=': 'NEQA',
+ r'<': 'LT',
+ r'=<': 'LE',
+ r'>': 'GT',
+ r'>=': 'GE',
+ r'@<': 'LTL',
+ r'@=<': 'LEL',
+ r'@>': 'GTL',
+ r'@>=': 'GEL',
+ r'+': 'PLUS',
+ r'-': 'MINUS',
+ r'*': 'STAR',
+ r'/': 'DIV',
+ r'//': 'IDIV',
+ r'mod': 'MOD',
+ r'**': 'POW',
+ r'.': 'PERIOD',
+ r',': 'COMMA',
+ r';': 'SEMI'
+}
+tokens = list(operators.values()) + [
+ 'UINTEGER', 'UREAL',
+ 'NAME', 'VARIABLE', 'STRING',
+ 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE',
+ 'INVALID'
+]
+
+# punctuation
+t_LBRACKET = r'\['
+t_RBRACKET = r'\]'
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_PIPE = r'\|'
+t_LBRACE = r'{'
+t_RBRACE = r'}'
+
+t_UINTEGER = r'[0-9]+'
+t_UREAL = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan'
+t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*'
+t_STRING = r'"(""|\\.|[^\"])*"'
+
+def t_NAME(t):
+ r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|,"
+ if t.lexer.lexpos >= len(t.lexer.lexdata) or t.lexer.lexdata[t.lexer.lexpos] != '(':
+ t.type = operators.get(t.value, 'NAME')
+ return t
+
+t_ignore = ' \t'
+
+# no support for nested comments yet
+def t_comment(t):
+ r'(/\*(.|\n)*?\*/)|(%.*)'
+ pass
+
+def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += len(t.value)
+
+def t_error(t):
+ # TODO send this to stderr
+ #print("Illegal character '" + t.value[0] + "'")
+ t.type = 'INVALID'
+ t.value = t.value[0]
+ t.lexer.skip(1)
+ return t
+
+lexer = lex.lex()
+
+