From 6a104bf8e2baea162d7f9f1d439dd8f671ddd413 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 4 Feb 2015 18:17:03 +0100 Subject: Clean up monkey.prolog.util --- monkey/edits.py | 24 ++++++++++------------ monkey/monkey.py | 5 +---- monkey/prolog/util.py | 56 +++++++++++++++++++++++++-------------------------- 3 files changed, 40 insertions(+), 45 deletions(-) diff --git a/monkey/edits.py b/monkey/edits.py index 58843f6..3e0ae08 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -170,31 +170,29 @@ def get_edits_from_traces(traces): # Update the submissions/queries counters; rename variables first to # remove trivial differences. for submission in trace_submissions: - tokens = tokenize(submission) - rename_vars(tokens) - code = stringify(tokens) + code = stringify(rename_vars(tokenize(submission))) submissions[code] += 1 for query in trace_queries: - tokens = tokenize(query) - rename_vars(tokens) - code = stringify(tokens) + code = stringify(rename_vars(tokenize(query))) queries[code] += 1 # Get edits. + done = set() for path in get_paths(nodes[0]): for i in range(len(path)): - start = list(remove_punct(path[i])) - var_names = rename_vars(start) - start_t = tuple(start) + var_names = {} + start = remove_punct(path[i]) + start_t = tuple(rename_vars(start, var_names)) for j in range(len(path[i+1:])): + var_names_copy = {k: v for k, v in var_names.items()} end = list(remove_punct(path[i+1+j])) - rename_vars(end, var_names) - end_t = tuple(end) + end_t = tuple(rename_vars(end, var_names_copy)) - if start_t != end_t: - edit = (start_t, end_t) + edit = (start_t, end_t) + if start_t != end_t and edit not in done: + done.add(edit) edits[edit] += 1 lines[start_t] += 1 diff --git a/monkey/monkey.py b/monkey/monkey.py index cae42ae..47bca5c 100755 --- a/monkey/monkey.py +++ b/monkey/monkey.py @@ -31,10 +31,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): if line_idx < start_line: continue line = lines[line_idx] - - line_normal = list(line) - rename_vars(line_normal) - line_normal = tuple(line_normal) + line_normal = tuple(rename_vars(line)) seen = False # Apply each edit that matches this line. diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py index 46f6c5c..8d8b266 100644 --- a/monkey/prolog/util.py +++ b/monkey/prolog/util.py @@ -1,24 +1,19 @@ #!/usr/bin/python3 -from .lexer import lexer +from .lexer import lexer, operators from ..util import Token +# Return a list of tokens in [text]. def tokenize(text): lexer.input(text) return [Token(t.type, t.value, t.lexpos) for t in lexer] -operators = set([ - 'FROM', 'IMPLIES', 'NOT', - 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA', - 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL', - 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD', - 'POW', 'SEMI' -]) +# Return a one-line string representation of [tokens]. def stringify(tokens): def token_str(t): if t.type in ('PERIOD', 'COMMA'): return str(t) + ' ' - if t.type in operators: + if t.type in operators.values(): return ' ' + str(t) + ' ' return str(t) return ''.join(map(token_str, tokens)) @@ -32,7 +27,7 @@ def split(code): yield stringify(tokens[start:idx]) start = idx + 1 -# return a list of lines in 'code', and a list of rule indexes +# Return a list of lines in [code] and a list of rule ranges. def decompose(code): lines = [] rules = [] @@ -68,9 +63,9 @@ def decompose(code): elif t.type == 'RBRACE' and parens[-1] == 'LBRACE': parens.pop() line.append(t) - return tuple(lines), tuple(rules) + return lines, rules -# pretty-print a list of rules +# Format a list of [lines] according to [rules] (as returned by decompose). def compose(lines, rules): code = '' for start, end in rules: @@ -89,25 +84,28 @@ def compose(lines, rules): code += '\n' return code.strip() -# standardize variable names in order of appearance -def rename_vars(tokens, names={}): - # copy names so we don't fuck it up - names = {k: v for k, v in names.items()} +# Rename variables in [tokens] to A0, A1, A2,… in order of appearance. +def rename_vars(tokens, names=None): + if names is None: + names = {} next_id = len(names) + + # Return a new list. + tokens = list(tokens) for i in range(len(tokens)): if tokens[i].type == 'PERIOD': names.clear() next_id = 0 elif tokens[i] == Token('VARIABLE', '_'): - tokens[i] = Token('VARIABLE', 'A' + str(next_id)) + tokens[i] = Token('VARIABLE', 'A{}'.format(next_id)) next_id += 1 elif tokens[i].type == 'VARIABLE': cur_name = tokens[i].val if cur_name not in names: - names[cur_name] = next_id + names[cur_name] = 'A{}'.format(next_id) next_id += 1 - tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name])) - return names + tokens[i] = Token('VARIABLE', names[cur_name]) + return tokens # transformation = before → after; applied on line which is part of rule # return mapping from formal vars in before+after to actual vars in rule @@ -139,14 +137,16 @@ def map_vars(before, after, line, rule): # Basic sanity check. if __name__ == '__main__': - print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). '))) - - rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ') - line = tokenize('dup([H|T], [H1|T1]) :-') - before = tokenize("dup([A0|A1], [A2|A3])") - after = tokenize("dup([A0|A1], [A5, A4|A3])") - var_names = rename_vars(before) - rename_vars(after, var_names) + code = 'dup([H|T], [H1|T1]) :- dup(T1, T2). ' + lines, rules = decompose(code) + print(compose(lines, rules)) + + var_names = {} + before = rename_vars(tokenize("dup([A0|A1], [A2|A3])"), var_names) + after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names) + + line = lines[0] + rule = tokenize(code) mapping = map_vars(before, after, line, rule) print(mapping) -- cgit v1.2.1