summaryrefslogtreecommitdiff
path: root/monkey
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@araneo.org>2015-03-02 16:06:36 +0100
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:02 +0200
commita60b0ef82a3f6c0cc38084200f92ae293d729116 (patch)
tree5beadcbf825551de37fd6484764ef44c87500632 /monkey
parent720f4aa6426e1463757b3192e13b9f705a354a65 (diff)
Annotate tokens instead of splitting program
Instead of splitting the program by line numbers, do limited parsing (enough to distinguish , in "memb(X,Y)" from , in "a :- b, c."). Each token in the parsed program is annotated with rule and part number. Rewrite monkey.fix.step to take program as a sequence of annotated tokens instead of lists of lines and rules. Improve message passing to website.
Diffstat (limited to 'monkey')
-rwxr-xr-xmonkey/monkey.py282
-rwxr-xr-xmonkey/test.py12
2 files changed, 192 insertions, 102 deletions
diff --git a/monkey/monkey.py b/monkey/monkey.py
index 79584e0..a7beb6e 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -5,7 +5,7 @@ import time
from .edits import classify_edits
from prolog.engine import test
-from prolog.util import Token, compose, decompose, map_vars, normalized, rename_vars, stringify
+from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify
from .util import PQueue
# Starting from [code], find a sequence of edits that transforms it into a
@@ -20,107 +20,198 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
inserts, removes, changes = classify_edits(edits)
# Generate states that can be reached from the given program with one edit.
- # Program code is given as a list of [lines], where each line is a list of
- # tokens. Rule ranges are given in [rules] (see prolog.util.decompose).
- def step(lines, rules, path=None):
- # Apply edits in order from top to bottom; skip lines with index lower
- # than last step.
- start_line = path[-1][1] if path else 0
-
- for start, end in rules:
- rule_lines = lines[start:end]
- rule_vars = [t.val for line in rule_lines for t in line
- if t.type == 'VARIABLE' and t.val != '_']
-
- # Prepend a new rule (fact) before this rule (only if no line in
- # the current rule has been modified yet).
- if start_line == 0 or start > start_line:
- for after, cost in inserts.items():
- new_lines = lines[:start] + (after,) + lines[start:]
- new_rules = []
- for old_start, old_end in rules:
- if old_start == start:
- new_rules.append((start, start+1))
- new_rules.append((old_start + (0 if old_start < start else 1),
- old_end + (0 if old_end < start else 1)))
- new_step = ('add_rule', start, (tuple(), after))
- # Decrease probability as we add more rules.
- new_cost = cost * math.pow(0.3, len(rules))
-
- yield (new_lines, new_rules, new_step, new_cost)
-
- # Apply some edits for each line in this rule.
- for line_idx in range(start, end):
- if line_idx < start_line:
- continue
- line = lines[line_idx]
- line_normal = tuple(rename_vars(line))
+ # The program is given as a list of tokens.
+ def step(program, path=None):
+ # Apply edits to program in order; skip tokens with index lower than
+ # last step.
+ start = path[-1][4] if path else 0
+
+ first = 0 # first token in the current part
+ variables = []
+ for i, token in enumerate(program):
+ # Get variable names in the current rule.
+ if i == 0 or program[i-1].type == 'PERIOD':
+ variables = []
+ for t in program[i:]:
+ if t.type == 'VARIABLE' and not t.val.startswith('_'):
+ if t.val not in variables:
+ variables.append(t.val)
+ elif t.type == 'PERIOD':
+ break
+
+ # Skip already modified parts of the program.
+ if i < start:
+ continue
+
+ if i == 0 or program[i-1].stop:
+ first = i
+
+ # Add a new fact at beginning or after another rule.
+ if i == 0 or token.type == 'PERIOD':
+ new_start = i+1 if token.type == 'PERIOD' else 0
+ n_rules = program.count(Token('PERIOD', '.'))
+ rule = 0 if i == 0 else program[i-1].rule+1 # index of new rule
+
+ for new, cost in inserts.items():
+ # New rule must start with correct predicate name.
+ if not (new[0].type == 'NAME' and new[0].val in name):
+ continue
- # Apply edits whose left-hand side matches this line.
+ # Here we always insert a fact, so replace trailing :- with ..
+ if new[-1].type == 'FROM':
+ new = new[:-1]
+
+ new_real = tuple([t.clone(rule=rule, part=0) for t in new + (Token('PERIOD', '.', stop=True),)])
+ new_after = tuple([t.clone(rule=t.rule+1) for t in program[new_start:]])
+
+ new_program = program[:new_start] + new_real + new_after
+ new_step = ('add_rule', new_start, (), new_real, new_start)
+ new_cost = cost * math.pow(0.3, n_rules)
+ yield (new_program, new_step, new_cost)
+
+ if token.stop and i > first:
+ real_last = last = i+1
+ if token.type != 'FROM':
+ real_last -= 1
+ part = program[first:real_last]
+ part_whole = program[first:last]
+ part_normal = tuple(rename_vars(part))
+
+ # Apply each edit a→b where a matches this part.
seen = False # has there been such an edit?
- for (before, after), cost in changes.items():
- if line_normal == before:
+ for (a, b), cost in changes.items():
+ if part_normal == a:
seen = True
- after_real = tuple(map_vars(before, after, line, rule_vars))
- new_lines = lines[:line_idx] + (after_real,) + lines[line_idx+1:]
- new_step = ('change_line', line_idx, (tuple(line), after_real))
-
- yield (new_lines, rules, new_step, cost)
-
- # Remove the current line.
- if line_normal in removes.keys() or not seen:
- new_lines = lines[:line_idx] + lines[line_idx+1:]
- new_rules = []
- for old_start, old_end in rules:
- new_start, new_end = (old_start - (0 if old_start <= line_idx else 1),
- old_end - (0 if old_end <= line_idx else 1))
- if new_end > new_start:
- new_rules.append((new_start, new_end))
- new_step = ('remove_line', line_idx, (tuple(line), ()))
- new_cost = removes.get(line_normal, 1.0) * 0.99
-
- yield (new_lines, new_rules, new_step, new_cost)
-
- # Add a line after this one.
- for after, cost in inserts.items():
- # Don't try to insert a head into the body.
- if after[-1].type == 'FROM':
- continue
- after_real = tuple(map_vars([], after, [], rule_vars))
-
- idx = line_idx+1
- new_lines = lines[:idx] + (after_real,) + lines[idx:]
- new_rules = []
- for old_start, old_end in rules:
- new_rules.append((old_start + (0 if old_start < idx else 1),
- old_end + (0 if old_end < idx else 1)))
- new_step = ('add_subgoal', idx, ((), after_real))
- new_cost = cost * 0.4
-
- yield (new_lines, new_rules, new_step, new_cost)
-
- # Return a cleaned-up list of steps:
- # - multiple line edits in a single line are merged into one
- # - check if any lines can be removed from the program
+ if b[-1].type == 'FROM':
+ b = b[:-1] + (b[-1].clone(stop=True),)
+ b_real = tuple([t.clone(rule=program[first].rule,
+ part=program[first].part)
+ for t in map_vars(a, b, part, variables)])
+
+ new_program = program[:first] + b_real + program[real_last:]
+ new_step = ('change_part', first, part, b_real, first)
+ yield (new_program, new_step, cost)
+
+
+ # Remove this part.
+ if token.type in ('COMMA', 'SEMI'):
+ if part_normal in removes.keys() or not seen:
+ new_after = list(program[last:])
+ for j, t in enumerate(new_after):
+ if t.rule > token.rule:
+ break
+ new_after[j] = t.clone(part=t.part-1)
+ new_program = program[:first] + tuple(new_after)
+ new_step = ('remove_part', first, part_whole, (), first-1)
+ new_cost = removes.get(part_normal, 1.0) * 0.99
+ yield (new_program, new_step, new_cost)
+
+ # Remove this part at the end of the current rule.
+ if token.type == 'PERIOD' and token.part > 0:
+ if part_normal in removes.keys() or not seen:
+ if token.part == 0: # part is fact, remove rule
+ new_after = list(program[last+1:])
+ for j, t in enumerate(new_after):
+ new_after[j] = t.clone(rule=t.rule-1)
+ new_program = program[:first] + tuple(new_after)
+ new_step = ('remove_rule', first, part, (), first)
+ new_cost = removes.get(part_normal, 1.0) * 0.99
+ yield (new_program, new_step, new_cost)
+
+ else: # part is subgoal, remove part
+ new_after = list(program[last-1:])
+ for j, t in enumerate(new_after):
+ if t.rule > token.rule:
+ break
+ new_after[j] = t.clone(part=t.part-1)
+ new_program = program[:first-1] + tuple(new_after)
+ new_step = ('remove_part', first-1, (program[first-1],) + part, (), first-1)
+ new_cost = removes.get(part_normal, 1.0) * 0.99
+ yield (new_program, new_step, new_cost)
+
+
+ # Insert a new part (goal) after this part.
+ if token.type in ('COMMA', 'FROM'):
+ for new, cost in inserts.items():
+ # Don't try to insert a head into the body.
+ if new[-1].type == 'FROM':
+ continue
+
+ new_real = tuple([t.clone(rule=program[first].rule,
+ part=program[first].part+1)
+ for t in map_vars([], new, [], variables) + [Token('COMMA', ',', stop=True)]])
+
+ new_after = list(program[last:])
+ for j, t in enumerate(new_after):
+ if t.rule > token.rule:
+ break
+ new_after[j] = t.clone(rule=program[first].rule, part=t.part+1)
+
+ new_program = program[:last] + new_real + tuple(new_after)
+ new_step = ('add_part', last, (), new_real, last)
+ new_cost = cost * 0.4
+ yield (new_program, new_step, new_cost)
+
+ # Insert a new part (goal) at the end of current rule.
+ if token.type == 'PERIOD':
+ for new, cost in inserts.items():
+ # Don't try to insert a head into the body.
+ if new[-1].type == 'FROM':
+ continue
+
+ prepend = Token('FROM', ':-') if token.part == 0 else Token('COMMA', ',')
+ new_real = (prepend.clone(stop=True, rule=token.rule, part=token.part),) + \
+ tuple([t.clone(rule=token.rule, part=token.part+1)
+ for t in map_vars([], new, [], variables)])
+
+ new_after = list(program[last-1:])
+ for j, t in enumerate(new_after):
+ if t.rule > token.rule:
+ break
+ new_after[j] = t.clone(rule=t.rule, part=t.part+1)
+
+ new_program = program[:last-1] + new_real + tuple(new_after)
+ new_step = ('add_part', last-1, (), new_real, last)
+ new_cost = cost * 0.4
+ yield (new_program, new_step, new_cost)
+
+ # Return a cleaned-up list of steps.
def postprocess(steps):
new_steps = []
for step in steps:
+ # Remove the last field from each step as it is unnecessary after a
+ # path has been found.
+ step = step[:4]
if new_steps:
prev = new_steps[-1]
+ if prev[0] == 'remove_part' and step[0] == 'remove_part' and \
+ prev[1] == step[1]:
+ new_steps[-1] = ('remove_part', prev[1], prev[2]+step[2], ())
+ continue
+
+ if prev[0] == 'remove_part' and step[0] == 'add_part' and \
+ prev[1] == step[1]:
+ new_steps[-1] = ('change_part', prev[1], prev[2], step[3])
+ continue
- if prev[1] == step[1] and \
- prev[0] in ('add_rule', 'add_subgoal', 'change_line') and \
- step[0] == 'change_line' and \
- normalized(prev[2][1]) == normalized(step[2][0]):
- new_steps[-1] = (prev[0], prev[1], (prev[2][0], step[2][1]))
+ if prev[0] == 'change_part' and step[0] == 'change_part' and \
+ prev[1] == step[1] and step[2] == prev[3]:
+ new_steps[-1] = ('change_part', prev[1], prev[2], step[3])
continue
- if prev[0] == 'add_subgoal' and step[0] == 'remove_line' and \
- prev[1]+1 == step[1]:
- new_steps[-1] = ('change_line', prev[1], (step[2][0], prev[2][1]))
+ if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \
+ prev[1] == step[1] and step[2] == prev[3][:-1]:
+ new_steps[-1] = (prev[0], prev[1], prev[2], step[3]+(prev[3][-1],))
continue
+ if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \
+ step[1] == prev[1]+1 and step[2] == prev[3][1:]:
+ new_steps[-1] = (prev[0], prev[1], prev[2], (prev[3][0],)+step[3])
+ continue
new_steps.append(step)
+ for step in new_steps:
+ print('index {}: {} {} → {}'.format(
+ step[1], step[0], stringify(step[2]), stringify(step[3])))
return new_steps
# Main loop: best-first search through generated programs.
@@ -129,8 +220,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
# Each program gets a task with the sequence of edits that generated it and
# the associated cost. First candidate with cost 1 is the initial program.
- lines, rules = decompose(code)
- todo.push(((tuple(lines), tuple(rules)), (), 1.0), -1.0)
+ todo.push((program, (), 1.0), -1.0)
n_tested = 0
start_time = time.monotonic()
@@ -140,10 +230,10 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
task = todo.pop()
if task == None:
break
- (lines, rules), path, path_cost = task
+ program, path, path_cost = task
# If we have already seen this code, skip it.
- code = compose(lines, rules)
+ code = compose(program)
if code in done:
continue
done.add(code)
@@ -151,8 +241,8 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
# Print some info about the current task.
if debug:
print('Cost {:.12f}'.format(path_cost))
- for step_type, line, (before, after) in path:
- print('line {}: {} {} → {}'.format(line, step_type, stringify(before), stringify(after)))
+ for step_type, idx, a, b, _ in path:
+ print('index {}: {} {} → {}'.format(idx, step_type, stringify(a), stringify(b)))
# If the code is correct, we are done.
if test(name, code + '\n' + aux_code):
@@ -161,12 +251,12 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
n_tested += 1
# Otherwise generate new solutions.
- for new_lines, new_rules, new_step, new_cost in step(lines, rules, path):
+ for new_program, new_step, new_cost in step(program, path):
new_path_cost = path_cost * new_cost
if new_path_cost < 0.01:
continue
new_path = path + (new_step,)
- todo.push(((tuple(new_lines), tuple(new_rules)), new_path, new_path_cost), -new_path_cost)
+ todo.push((new_program, new_path, new_path_cost), -new_path_cost)
total_time = time.monotonic() - start_time
diff --git a/monkey/test.py b/monkey/test.py
index 83aa0c2..17c27fe 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -11,7 +11,7 @@ from .edits import classify_edits, trace_graph
from .graph import graphviz
from .monkey import fix
from prolog.engine import test
-from prolog.util import compose, decompose, stringify
+from prolog.util import annotate, compose, stringify
from .util import indent
# Load django models.
@@ -57,10 +57,10 @@ def print_hint(solution, steps, fix_time, n_tested):
if solution:
print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green'))
print(colored(' Edits', 'blue'))
- for step_type, line, (before, after) in steps:
- print(' {}: {} {} → {}'.format(line, step_type, stringify(before), stringify(after)))
+ for step_type, pos, a, b in steps:
+ print(' {}: {} {} → {}'.format(pos, step_type, stringify(a), stringify(b)))
print(colored(' Final version', 'blue'))
- print(indent(compose(*decompose(solution)), 2))
+ print(indent(compose(annotate(solution)), 2))
else:
print(colored('Hint not found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'red'))
@@ -75,7 +75,7 @@ if len(sys.argv) >= 3 and sys.argv[2] == 'test':
if program in done:
continue
print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
- print(indent(compose(*decompose(program)), 2))
+ print(indent(compose(annotate(program)), 2))
solution, steps, fix_time, n_tested = fix(problem.name, program, edits, aux_code=aux_code, timeout=timeout)
if solution:
@@ -119,7 +119,7 @@ elif len(sys.argv) >= 3 and sys.argv[2] == 'info':
for p in sorted(incorrect):
if p in done:
continue
- print(indent(compose(*decompose(p)), 2))
+ print(indent(compose(annotate(p)), 2))
print()
# Print all student queries and their counts.
elif sys.argv[3] == 'queries':