From 27d4458613a5b61f16ad9bf59ca1de460fea3b3a Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 9 Jan 2017 18:07:23 +0100 Subject: First commit is the best commit --- prolog/lexer.py | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 prolog/lexer.py (limited to 'prolog/lexer.py') diff --git a/prolog/lexer.py b/prolog/lexer.py new file mode 100644 index 0000000..5023e57 --- /dev/null +++ b/prolog/lexer.py @@ -0,0 +1,130 @@ +#!/usr/bin/python3 + +# CodeQ: an online programming tutor. +# Copyright (C) 2015 UL FRI +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import ply.lex as lex + +# LEXER + +#states = ( +# ('comment', 'exclusive'), +#) + +# tokens; treat operators as names if followed by ( +operators = { + r':-': 'FROM', + r'-->': 'FROMDCG', + r'->': 'IMPLIES', + r'\+': 'NOT', + r'=': 'EQU', + r'\=': 'NEQU', + r'==': 'EQ', + r'\==': 'NEQ', + r'=..': 'UNIV', + r'is': 'IS', + r'=:=': 'EQA', + r'=\=': 'NEQA', + r'<': 'LT', + r'=<': 'LE', + r'>': 'GT', + r'>=': 'GE', + r'@<': 'LTL', + r'@=<': 'LEL', + r'@>': 'GTL', + r'@>=': 'GEL', + r'#=': 'EQFD', + r'#\=': 'NEQFD', + r'#<': 'LTFD', + r'#=<': 'LEFD', + r'#>': 'GTFD', + r'#>=': 'GEFD', + r'in': 'IN', + r'ins': 'INS', + r'..': 'THROUGH', + r'+': 'PLUS', + r'-': 'MINUS', + r'*': 'STAR', + r'/': 'DIV', + r'//': 'IDIV', + r'mod': 'MOD', + r'**': 'POW', + r'^': 'POW', + r'.': 'PERIOD', + r',': 'COMMA', + r';': 'SEMI' +} +tokens = sorted(list(operators.values())) + [ + 'UINTEGER', 'UREAL', + 'NAME', 'VARIABLE', 'STRING', + 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'PIPE', 'LBRACE', 'RBRACE', + 'INVALID' +] + +# punctuation +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_PIPE = r'\|' +t_LBRACE = r'{' +t_RBRACE = r'}' + +t_UINTEGER = r'[0-9]+' +t_UREAL = r'[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?|inf|nan' +t_VARIABLE = r'(_|[A-Z])[a-zA-Z0-9_]*' +t_STRING = r'"(""|\\.|[^\"])*"' + +# no support for nested comments yet +def t_comment(t): + r'(/\*(.|\n)*?\*/)|(%.*)' + pass + +def t_NAME(t): + r"'(''|\\.|[^\\'])*'|[a-z][a-zA-Z0-9_]*|[-+*/\\^<>=~:.?@#$&]+|!|;|," + if t.value == ',' or \ + t.lexer.lexpos >= len(t.lexer.lexdata) or t.lexer.lexdata[t.lexer.lexpos] != '(': + t.type = operators.get(t.value, 'NAME') + return t + +t_ignore = ' \t' + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + +def t_error(t): + # TODO send this to stderr + #print("Illegal character '" + t.value[0] + "'") + t.type = 'INVALID' + t.value = t.value[0] + t.lexer.skip(1) + return t + +lexer = lex.lex(errorlog=lex.NullLogger()) + +if __name__ == '__main__': + while True: + try: + s = input('> ') + except EOFError: + break + if not s: + continue + + lexer.input(s) + tokens = list(lexer) + print(tokens) -- cgit v1.2.1