summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2015-02-04 18:17:03 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:01 +0200
commit6a104bf8e2baea162d7f9f1d439dd8f671ddd413 (patch)
treedcff17886abf0db6c01107f8a202f344a4e98bfe
parent127bc6d1c3169a80c735b01588c968b85d2df944 (diff)
Clean up monkey.prolog.util
-rw-r--r--monkey/edits.py24
-rwxr-xr-xmonkey/monkey.py5
-rw-r--r--monkey/prolog/util.py56
3 files changed, 40 insertions, 45 deletions
diff --git a/monkey/edits.py b/monkey/edits.py
index 58843f6..3e0ae08 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -170,31 +170,29 @@ def get_edits_from_traces(traces):
# Update the submissions/queries counters; rename variables first to
# remove trivial differences.
for submission in trace_submissions:
- tokens = tokenize(submission)
- rename_vars(tokens)
- code = stringify(tokens)
+ code = stringify(rename_vars(tokenize(submission)))
submissions[code] += 1
for query in trace_queries:
- tokens = tokenize(query)
- rename_vars(tokens)
- code = stringify(tokens)
+ code = stringify(rename_vars(tokenize(query)))
queries[code] += 1
# Get edits.
+ done = set()
for path in get_paths(nodes[0]):
for i in range(len(path)):
- start = list(remove_punct(path[i]))
- var_names = rename_vars(start)
- start_t = tuple(start)
+ var_names = {}
+ start = remove_punct(path[i])
+ start_t = tuple(rename_vars(start, var_names))
for j in range(len(path[i+1:])):
+ var_names_copy = {k: v for k, v in var_names.items()}
end = list(remove_punct(path[i+1+j]))
- rename_vars(end, var_names)
- end_t = tuple(end)
+ end_t = tuple(rename_vars(end, var_names_copy))
- if start_t != end_t:
- edit = (start_t, end_t)
+ edit = (start_t, end_t)
+ if start_t != end_t and edit not in done:
+ done.add(edit)
edits[edit] += 1
lines[start_t] += 1
diff --git a/monkey/monkey.py b/monkey/monkey.py
index cae42ae..47bca5c 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -31,10 +31,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
if line_idx < start_line:
continue
line = lines[line_idx]
-
- line_normal = list(line)
- rename_vars(line_normal)
- line_normal = tuple(line_normal)
+ line_normal = tuple(rename_vars(line))
seen = False
# Apply each edit that matches this line.
diff --git a/monkey/prolog/util.py b/monkey/prolog/util.py
index 46f6c5c..8d8b266 100644
--- a/monkey/prolog/util.py
+++ b/monkey/prolog/util.py
@@ -1,24 +1,19 @@
#!/usr/bin/python3
-from .lexer import lexer
+from .lexer import lexer, operators
from ..util import Token
+# Return a list of tokens in [text].
def tokenize(text):
lexer.input(text)
return [Token(t.type, t.value, t.lexpos) for t in lexer]
-operators = set([
- 'FROM', 'IMPLIES', 'NOT',
- 'EQU', 'NEQU', 'EQ', 'NEQ', 'UNIV', 'IS', 'EQA', 'NEQA',
- 'LT', 'LE', 'GT', 'GE', 'LTL', 'LEL', 'GTL', 'GEL',
- 'PLUS', 'MINUS', 'STAR', 'DIV', 'IDIV', 'MOD',
- 'POW', 'SEMI'
-])
+# Return a one-line string representation of [tokens].
def stringify(tokens):
def token_str(t):
if t.type in ('PERIOD', 'COMMA'):
return str(t) + ' '
- if t.type in operators:
+ if t.type in operators.values():
return ' ' + str(t) + ' '
return str(t)
return ''.join(map(token_str, tokens))
@@ -32,7 +27,7 @@ def split(code):
yield stringify(tokens[start:idx])
start = idx + 1
-# return a list of lines in 'code', and a list of rule indexes
+# Return a list of lines in [code] and a list of rule ranges.
def decompose(code):
lines = []
rules = []
@@ -68,9 +63,9 @@ def decompose(code):
elif t.type == 'RBRACE' and parens[-1] == 'LBRACE':
parens.pop()
line.append(t)
- return tuple(lines), tuple(rules)
+ return lines, rules
-# pretty-print a list of rules
+# Format a list of [lines] according to [rules] (as returned by decompose).
def compose(lines, rules):
code = ''
for start, end in rules:
@@ -89,25 +84,28 @@ def compose(lines, rules):
code += '\n'
return code.strip()
-# standardize variable names in order of appearance
-def rename_vars(tokens, names={}):
- # copy names so we don't fuck it up
- names = {k: v for k, v in names.items()}
+# Rename variables in [tokens] to A0, A1, A2,… in order of appearance.
+def rename_vars(tokens, names=None):
+ if names is None:
+ names = {}
next_id = len(names)
+
+ # Return a new list.
+ tokens = list(tokens)
for i in range(len(tokens)):
if tokens[i].type == 'PERIOD':
names.clear()
next_id = 0
elif tokens[i] == Token('VARIABLE', '_'):
- tokens[i] = Token('VARIABLE', 'A' + str(next_id))
+ tokens[i] = Token('VARIABLE', 'A{}'.format(next_id))
next_id += 1
elif tokens[i].type == 'VARIABLE':
cur_name = tokens[i].val
if cur_name not in names:
- names[cur_name] = next_id
+ names[cur_name] = 'A{}'.format(next_id)
next_id += 1
- tokens[i] = Token('VARIABLE', 'A' + str(names[cur_name]))
- return names
+ tokens[i] = Token('VARIABLE', names[cur_name])
+ return tokens
# transformation = before → after; applied on line which is part of rule
# return mapping from formal vars in before+after to actual vars in rule
@@ -139,14 +137,16 @@ def map_vars(before, after, line, rule):
# Basic sanity check.
if __name__ == '__main__':
- print(compose(*decompose('dup([H|T], [H1|T1]) :- dup(T1, T2). ')))
-
- rule = tokenize('dup([H|T], [H1|T1]) :- dup(T1, T2). ')
- line = tokenize('dup([H|T], [H1|T1]) :-')
- before = tokenize("dup([A0|A1], [A2|A3])")
- after = tokenize("dup([A0|A1], [A5, A4|A3])")
- var_names = rename_vars(before)
- rename_vars(after, var_names)
+ code = 'dup([H|T], [H1|T1]) :- dup(T1, T2). '
+ lines, rules = decompose(code)
+ print(compose(lines, rules))
+
+ var_names = {}
+ before = rename_vars(tokenize("dup([A0|A1], [A2|A3])"), var_names)
+ after = rename_vars(tokenize("dup([A0|A1], [A5, A4|A3])"), var_names)
+
+ line = lines[0]
+ rule = tokenize(code)
mapping = map_vars(before, after, line, rule)
print(mapping)