summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2014-11-17 12:32:19 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:00 +0200
commit770f1ed8f7aeeb52db483dd72a5d4712839f9760 (patch)
treeb56f82f43ac9fbad716b4092b96029a406fc5dc6
parenta4f46cfe3e2c8b1307df396c6c8c37b4f61a59bd (diff)
Keep token positions when lexing
This will allow us to match line edits to original source locations.
-rwxr-xr-xprolog/engine.py2
-rw-r--r--prolog/util.py62
2 files changed, 32 insertions, 32 deletions
diff --git a/prolog/engine.py b/prolog/engine.py
index 6087bbc..299d2eb 100755
--- a/prolog/engine.py
+++ b/prolog/engine.py
@@ -139,7 +139,7 @@ class PrologEngine(object):
try:
start = 0
for idx in range(len(tokens)):
- if tokens[idx] != ('PERIOD', '.') or idx - start <= 1:
+ if tokens[idx].type != 'PERIOD' or idx - start <= 1:
continue
rule = stringify(tokens[start:idx])
orig_rule = rule
diff --git a/prolog/util.py b/prolog/util.py
index b7805db..b7e536b 100644
--- a/prolog/util.py
+++ b/prolog/util.py
@@ -5,13 +5,11 @@ import math
import re
from .lexer import lexer
+from util import Token
-# new lexer stuff
def tokenize(text):
- # feed the troll
lexer.input(text)
- # we are not interested in line numbers and absolute positions
- return [(t.type, t.value) for t in lexer]
+ return [Token(t.type, t.value, t.lexpos) for t in lexer]
operators = set([
'FROM', 'IMPLIES', 'NOT',
@@ -22,11 +20,11 @@ operators = set([
])
def stringify(tokens):
def token_str(t):
- if t[0] in ('PERIOD', 'COMMA'):
- return t[1] + ' '
- elif t[0] in operators:
- return ' ' + t[1] + ' '
- return t[1]
+ if t.type in ('PERIOD', 'COMMA'):
+ return str(t) + ' '
+ if t.type in operators:
+ return ' ' + str(t) + ' '
+ return str(t)
return ''.join(map(token_str, tokens))
# return a list of lines in 'code', and a list of rule indexes
@@ -34,35 +32,35 @@ def decompose(code):
lines = []
rules = []
tokens = tokenize(code)
- tokens.append(('EOF', ''))
+ tokens.append(Token('EOF'))
line = []
parens = []
rule_start = 0
for t in tokens:
- if t[0] == 'SEMI':
+ if t.type == 'SEMI':
if line != []:
lines.append(tuple(line))
line = []
lines.append((t,))
continue
if not parens:
- if t[0] in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
+ if t.type in ('PERIOD', 'FROM', 'COMMA', 'EOF'):
if line != []:
lines.append(tuple(line))
line = []
- if t[0] in ('PERIOD', 'EOF') and rule_start < len(lines):
+ if t.type in ('PERIOD', 'EOF') and rule_start < len(lines):
rules.append((rule_start, len(lines)))
rule_start = len(lines)
continue
- if t[0] in ('LPAREN', 'LBRACKET', 'LBRACE'):
- parens.append(t[0])
+ if t.type in ('LPAREN', 'LBRACKET', 'LBRACE'):
+ parens.append(t.type)
elif parens:
- if t[0] == 'RPAREN' and parens[-1] == 'LPAREN':
+ if t.type == 'RPAREN' and parens[-1] == 'LPAREN':
parens.pop()
- elif t[0] == 'RBRACKET' and parens[-1] == 'LBRACKET':
+ elif t.type == 'RBRACKET' and parens[-1] == 'LBRACKET':
parens.pop()
- elif t[0] == 'RBRACE' and parens[-1] == 'LBRACE':
+ elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
parens.pop()
line.append(t)
return tuple(lines), tuple(rules)
@@ -81,7 +79,7 @@ def compose(lines, rules):
elif i == start:
code += ' :-\n'
else:
- if line and line[-1][0] != 'SEMI' and lines[i+1][-1][0] != 'SEMI':
+ if line and line[-1].type != 'SEMI' and lines[i+1][-1].type != 'SEMI':
code += ','
code += '\n'
return code.strip()
@@ -92,18 +90,18 @@ def rename_vars(tokens, names={}):
names = {k: v for k, v in names.items()}
next_id = len(names)
for i in range(len(tokens)):
- if tokens[i][0] == 'PERIOD':
+ if tokens[i].type == 'PERIOD':
names.clear()
next_id = 0
- elif tokens[i] == ('VARIABLE', '_'):
- tokens[i] = ('VARIABLE', 'A' + str(next_id))
+ elif tokens[i] == Token('VARIABLE', '_'):
+ tokens[i] = Token('VARIABLE', 'A' + str(next_id))
next_id += 1
- elif tokens[i][0] == 'VARIABLE':
- cur_name = tokens[i][1]
+ elif tokens[i].type == 'VARIABLE':
+ cur_name = tokens[i].val
if cur_name not in names:
names[cur_name] = next_id
next_id += 1
- tokens[i] = ('VARIABLE', 'A' + str(names[cur_name]))
+ tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name]))
return names
# transformation = before → after; applied on line which is part of rule
@@ -113,17 +111,17 @@ def map_vars(before, after, line, rule):
mapping = {}
new_index = 0
for i in range(len(before)):
- if line[i][0] == 'VARIABLE':
- formal_name = before[i][1]
- if line[i][1] != '_':
- actual_name = line[i][1]
+ if line[i].type == 'VARIABLE':
+ formal_name = before[i].val
+ if line[i].val != '_':
+ actual_name = line[i].val
else:
actual_name = 'New'+str(new_index)
new_index += 1
mapping[formal_name] = actual_name
- remaining_formal = [t[1] for t in after if t[0] == 'VARIABLE' and t[1] not in mapping.keys()]
- remaining_actual = [t[1] for t in rule if t[0] == 'VARIABLE' and t[1] != '_' and t[1] not in mapping.values()]
+ remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()]
+ remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()]
while len(remaining_actual) < len(remaining_formal):
remaining_actual.append('New'+str(new_index))
@@ -136,6 +134,8 @@ def map_vars(before, after, line, rule):
# Basic sanity check.
if __name__ == '__main__':
+ print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). ')))
+
rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ')
line = tokenize('dup([H|T], [H1|T1]) :-')
before = tokenize("dup([A0|A1], [A2|A3])")