diff options
Diffstat (limited to 'prolog')
-rw-r--r-- | prolog/util.py | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/prolog/util.py b/prolog/util.py index 48e3345..30f12da 100644 --- a/prolog/util.py +++ b/prolog/util.py @@ -6,18 +6,18 @@ from .lexer import lexer, operators # Stores a token's type and value, and optionally the position of the first # character in the lexed stream. -class Token(namedtuple('Token', ['type', 'val', 'pos'])): +class Token(namedtuple('Token', ['type', 'val', 'pos', 'rule', 'part', 'stop'])): __slots__ = () # Custom constructor to support default parameters. - def __new__(cls, type, val='', pos=None): - return super(Token, cls).__new__(cls, type, val, pos) + def __new__(cls, type, val='', pos=None, rule=None, part=None, stop=False): + return super(Token, cls).__new__(cls, type, val, pos, rule, part, stop) def __str__(self): return self.val - # Ignore position when comparing tokens. There is probably a cleaner way of - # doing these. + # Only consider type and value when comparing tokens. There is probably a + # cleaner way of doing this. __eq__ = lambda x, y: x[0] == y[0] and x[1] == y[1] __ne__ = lambda x, y: x[0] != y[0] or x[1] != y[1] __lt__ = lambda x, y: tuple.__lt__(x[0:2], y[0:2]) @@ -30,6 +30,15 @@ class Token(namedtuple('Token', ['type', 'val', 'pos'])): def __hash__(self): return hash(self[1]) +# Return a new Token, possibly modifying some fields. +def clone_token(token, val=None, pos=None, rule=None, part=None): + return Token(token.type, + token.val if val is None else val, + token.pos if pos is None else pos, + token.rule if rule is None else rule, + token.part if part is None else part, + token.stop) + # Return a list of tokens in [text]. def tokenize(text): lexer.input(text) @@ -167,24 +176,25 @@ def normalized(line, var_names=None): i -= 1 return tuple(rename_vars(line[:i], var_names)) -# transformation = before → after; applied on line which is part of rule -# return mapping from formal vars in before+after to actual vars in rule -# line and rule should of course not be normalized -def map_vars(before, after, line, rule): +# Map "formal" variable names in the edit a→b to actual names in code [tokens]. +# The set [variables] contains all variable names in the current scope. These +# are used in cases such as [A]→[A,B], where the edit introduces new variables. +# Return a new version of b with actual variable names. +def map_vars(a, b, tokens, variables): mapping = {} new_index = 0 - for i in range(len(before)): - if line[i].type == 'VARIABLE': - formal_name = before[i].val - if line[i].val != '_': - actual_name = line[i].val + for i in range(len(a)): + if tokens[i].type == 'VARIABLE': + formal_name = a[i].val + if tokens[i].val != '_': + actual_name = tokens[i].val else: actual_name = 'New'+str(new_index) new_index += 1 mapping[formal_name] = actual_name - remaining_formal = [t.val for t in after if t.type == 'VARIABLE' and t.val not in mapping.keys()] - remaining_actual = [t.val for t in rule if t.type == 'VARIABLE' and t.val != '_' and t.val not in mapping.values()] + remaining_formal = [t.val for t in b if t.type == 'VARIABLE' and t.val not in mapping.keys()] + remaining_actual = [var for var in variables if var not in mapping.values()] while len(remaining_actual) < len(remaining_formal): remaining_actual.append('New'+str(new_index)) @@ -193,7 +203,7 @@ def map_vars(before, after, line, rule): for i, formal_name in enumerate(remaining_formal): mapping[formal_name] = remaining_actual[i] - return mapping + return [t if t.type != 'VARIABLE' else clone_token(t, val=mapping[t.val]) for t in b] # Basic sanity check. if __name__ == '__main__': @@ -206,7 +216,6 @@ if __name__ == '__main__': after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names) line = lines[0] - rule = tokenize(code) - - mapping = map_vars(before, after, line, rule) - print(mapping) + variables = [t.val for t in tokenize(code) if t.type == 'VARIABLE'] + mapped = map_vars(before, after, line, variables) + print(mapped) |