1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#!/usr/bin/python3
# CodeQ: an online programming tutor.
# Copyright (C) 2015 UL FRI
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import ply.lex as lex
# LEXER
#states = (
# ('comment', 'exclusive'),
#)
# tokens; treat operators as names if followed by (
operators = {
r':-': 'FROM',
r'->': 'IMPLIES',
r'\+': 'NOT',
r'not': 'NOT',
r'=': 'EQU',
r'\=': 'NEQU',
r'==': 'EQ',
r'\==': 'NEQ',
r'=..': 'UNIV',
r'is': 'IS',
r'=:=': 'EQA',
r'=\=': 'NEQA',
r'<': 'LT',
r'=<': 'LE',
r'>': 'GT',
r'>=': 'GE',
r'@<': 'LTL',
r'@=<': 'LEL',
r'@>': 'GTL',
r'@>=': 'GEL',
r'+': 'PLUS',
r'-': 'MINUS',
r'*': 'STAR',
r'/': 'DIV',
r'//': 'IDIV',
r'mod': 'MOD',
r'**': 'POW',
r'^': 'POW',
r'.': 'PERIOD',
r',': 'COMMA',
r';': 'SEMI'
}
tokens = sorted(list(operators.values())) + [
'UINTEGER', 'UREAL',
'NAME', 'VARIABLE', 'STRING',
'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE',
'INVALID'
]
# punctuation
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_PIPE = r'\|'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_UINTEGER = r'[0-9]+'
t_UREAL = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan'
t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*'
t_STRING = r'"(""|\\.|[^\"])*"'
# no support for nested comments yet
def t_comment(t):
r'(/\*(.|\n)*?\*/)|(%.*)'
pass
def t_NAME(t):
r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|,"
if t.value == ',' or \
t.lexer.lexpos >= len(t.lexer.lexdata) or t.lexer.lexdata[t.lexer.lexpos] != '(':
t.type = operators.get(t.value, 'NAME')
return t
t_ignore = ' \t'
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(t):
# TODO send this to stderr
#print("Illegal character '" + t.value[0] + "'")
t.type = 'INVALID'
t.value = t.value[0]
t.lexer.skip(1)
return t
lexer = lex.lex(errorlog=lex.NullLogger())
if __name__ == '__main__':
while True:
try:
s = input('> ')
except EOFError:
break
if not s:
continue
lexer.input(s)
tokens = list(lexer)
print(tokens)
|