1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
#!/usr/bin/python3
# CodeQ: an online programming tutor.
# Copyright (C) 2015-2017 UL FRI
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import ply.lex as lex
operators = {
r':-': 'FROM',
r'-->': 'FROMDCG',
r'->': 'IMPLIES',
r'\+': 'NOT',
r'=': 'EQU',
r'\=': 'NEQU',
r'==': 'EQ',
r'\==': 'NEQ',
r'=..': 'UNIV',
r'is': 'IS',
r'=:=': 'EQA',
r'=\=': 'NEQA',
r'<': 'LT',
r'=<': 'LE',
r'>': 'GT',
r'>=': 'GE',
r'@<': 'LTL',
r'@=<': 'LEL',
r'@>': 'GTL',
r'@>=': 'GEL',
r'#=': 'EQFD',
r'#\=': 'NEQFD',
r'#<': 'LTFD',
r'#=<': 'LEFD',
r'#>': 'GTFD',
r'#>=': 'GEFD',
r'in': 'IN',
r'ins': 'INS',
r'..': 'THROUGH',
r'+': 'PLUS',
r'-': 'MINUS',
r'*': 'STAR',
r'/': 'DIV',
r'//': 'IDIV',
r'mod': 'MOD',
r'**': 'POW',
r'^': 'POW',
r'.': 'PERIOD',
r',': 'COMMA',
r';': 'SEMI'
}
tokens = sorted(list(operators.values())) + [
'UINTEGER', 'UREAL',
'NAME', 'VARIABLE', 'STRING',
'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE',
'INVALID'
]
# punctuation
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_PIPE = r'\|'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_UINTEGER = r'[0-9]+'
t_UREAL = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan'
t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*'
t_STRING = r'"(""|\\.|[^\"])*"'
# TODO support nested comments
def t_comment(t):
r'(/\*(.|\n)*?\*/)|(%.*)'
pass
def t_NAME(t):
r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|,"
# return appropriate tokens for names that are operators
t.type = operators.get(t.value, 'NAME')
return t
t_ignore = ' \t'
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_error(t):
# TODO send this to stderr
#print("Illegal character '" + t.value[0] + "'")
t.type = 'INVALID'
t.value = t.value[0]
t.lexer.skip(1)
return t
lexer = lex.lex(errorlog=lex.NullLogger())
if __name__ == '__main__':
while True:
try:
s = input('> ')
except EOFError:
break
if not s:
continue
lexer.input(s)
tokens = list(lexer)
print(tokens)
|