From a60b0ef82a3f6c0cc38084200f92ae293d729116 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 2 Mar 2015 16:06:36 +0100 Subject: Annotate tokens instead of splitting program Instead of splitting the program by line numbers, do limited parsing (enough to distinguish , in "memb(X,Y)" from , in "a :- b, c."). Each token in the parsed program is annotated with rule and part number. Rewrite monkey.fix.step to take program as a sequence of annotated tokens instead of lists of lines and rules. Improve message passing to website. --- monkey/monkey.py | 282 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 186 insertions(+), 96 deletions(-) (limited to 'monkey/monkey.py') diff --git a/monkey/monkey.py b/monkey/monkey.py index 79584e0..a7beb6e 100755 --- a/monkey/monkey.py +++ b/monkey/monkey.py @@ -5,7 +5,7 @@ import time from .edits import classify_edits from prolog.engine import test -from prolog.util import Token, compose, decompose, map_vars, normalized, rename_vars, stringify +from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify from .util import PQueue # Starting from [code], find a sequence of edits that transforms it into a @@ -20,107 +20,198 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): inserts, removes, changes = classify_edits(edits) # Generate states that can be reached from the given program with one edit. - # Program code is given as a list of [lines], where each line is a list of - # tokens. Rule ranges are given in [rules] (see prolog.util.decompose). - def step(lines, rules, path=None): - # Apply edits in order from top to bottom; skip lines with index lower - # than last step. - start_line = path[-1][1] if path else 0 - - for start, end in rules: - rule_lines = lines[start:end] - rule_vars = [t.val for line in rule_lines for t in line - if t.type == 'VARIABLE' and t.val != '_'] - - # Prepend a new rule (fact) before this rule (only if no line in - # the current rule has been modified yet). - if start_line == 0 or start > start_line: - for after, cost in inserts.items(): - new_lines = lines[:start] + (after,) + lines[start:] - new_rules = [] - for old_start, old_end in rules: - if old_start == start: - new_rules.append((start, start+1)) - new_rules.append((old_start + (0 if old_start < start else 1), - old_end + (0 if old_end < start else 1))) - new_step = ('add_rule', start, (tuple(), after)) - # Decrease probability as we add more rules. - new_cost = cost * math.pow(0.3, len(rules)) - - yield (new_lines, new_rules, new_step, new_cost) - - # Apply some edits for each line in this rule. - for line_idx in range(start, end): - if line_idx < start_line: - continue - line = lines[line_idx] - line_normal = tuple(rename_vars(line)) + # The program is given as a list of tokens. + def step(program, path=None): + # Apply edits to program in order; skip tokens with index lower than + # last step. + start = path[-1][4] if path else 0 + + first = 0 # first token in the current part + variables = [] + for i, token in enumerate(program): + # Get variable names in the current rule. + if i == 0 or program[i-1].type == 'PERIOD': + variables = [] + for t in program[i:]: + if t.type == 'VARIABLE' and not t.val.startswith('_'): + if t.val not in variables: + variables.append(t.val) + elif t.type == 'PERIOD': + break + + # Skip already modified parts of the program. + if i < start: + continue + + if i == 0 or program[i-1].stop: + first = i + + # Add a new fact at beginning or after another rule. + if i == 0 or token.type == 'PERIOD': + new_start = i+1 if token.type == 'PERIOD' else 0 + n_rules = program.count(Token('PERIOD', '.')) + rule = 0 if i == 0 else program[i-1].rule+1 # index of new rule + + for new, cost in inserts.items(): + # New rule must start with correct predicate name. + if not (new[0].type == 'NAME' and new[0].val in name): + continue - # Apply edits whose left-hand side matches this line. + # Here we always insert a fact, so replace trailing :- with .. + if new[-1].type == 'FROM': + new = new[:-1] + + new_real = tuple([t.clone(rule=rule, part=0) for t in new + (Token('PERIOD', '.', stop=True),)]) + new_after = tuple([t.clone(rule=t.rule+1) for t in program[new_start:]]) + + new_program = program[:new_start] + new_real + new_after + new_step = ('add_rule', new_start, (), new_real, new_start) + new_cost = cost * math.pow(0.3, n_rules) + yield (new_program, new_step, new_cost) + + if token.stop and i > first: + real_last = last = i+1 + if token.type != 'FROM': + real_last -= 1 + part = program[first:real_last] + part_whole = program[first:last] + part_normal = tuple(rename_vars(part)) + + # Apply each edit a→b where a matches this part. seen = False # has there been such an edit? - for (before, after), cost in changes.items(): - if line_normal == before: + for (a, b), cost in changes.items(): + if part_normal == a: seen = True - after_real = tuple(map_vars(before, after, line, rule_vars)) - new_lines = lines[:line_idx] + (after_real,) + lines[line_idx+1:] - new_step = ('change_line', line_idx, (tuple(line), after_real)) - - yield (new_lines, rules, new_step, cost) - - # Remove the current line. - if line_normal in removes.keys() or not seen: - new_lines = lines[:line_idx] + lines[line_idx+1:] - new_rules = [] - for old_start, old_end in rules: - new_start, new_end = (old_start - (0 if old_start <= line_idx else 1), - old_end - (0 if old_end <= line_idx else 1)) - if new_end > new_start: - new_rules.append((new_start, new_end)) - new_step = ('remove_line', line_idx, (tuple(line), ())) - new_cost = removes.get(line_normal, 1.0) * 0.99 - - yield (new_lines, new_rules, new_step, new_cost) - - # Add a line after this one. - for after, cost in inserts.items(): - # Don't try to insert a head into the body. - if after[-1].type == 'FROM': - continue - after_real = tuple(map_vars([], after, [], rule_vars)) - - idx = line_idx+1 - new_lines = lines[:idx] + (after_real,) + lines[idx:] - new_rules = [] - for old_start, old_end in rules: - new_rules.append((old_start + (0 if old_start < idx else 1), - old_end + (0 if old_end < idx else 1))) - new_step = ('add_subgoal', idx, ((), after_real)) - new_cost = cost * 0.4 - - yield (new_lines, new_rules, new_step, new_cost) - - # Return a cleaned-up list of steps: - # - multiple line edits in a single line are merged into one - # - check if any lines can be removed from the program + if b[-1].type == 'FROM': + b = b[:-1] + (b[-1].clone(stop=True),) + b_real = tuple([t.clone(rule=program[first].rule, + part=program[first].part) + for t in map_vars(a, b, part, variables)]) + + new_program = program[:first] + b_real + program[real_last:] + new_step = ('change_part', first, part, b_real, first) + yield (new_program, new_step, cost) + + + # Remove this part. + if token.type in ('COMMA', 'SEMI'): + if part_normal in removes.keys() or not seen: + new_after = list(program[last:]) + for j, t in enumerate(new_after): + if t.rule > token.rule: + break + new_after[j] = t.clone(part=t.part-1) + new_program = program[:first] + tuple(new_after) + new_step = ('remove_part', first, part_whole, (), first-1) + new_cost = removes.get(part_normal, 1.0) * 0.99 + yield (new_program, new_step, new_cost) + + # Remove this part at the end of the current rule. + if token.type == 'PERIOD' and token.part > 0: + if part_normal in removes.keys() or not seen: + if token.part == 0: # part is fact, remove rule + new_after = list(program[last+1:]) + for j, t in enumerate(new_after): + new_after[j] = t.clone(rule=t.rule-1) + new_program = program[:first] + tuple(new_after) + new_step = ('remove_rule', first, part, (), first) + new_cost = removes.get(part_normal, 1.0) * 0.99 + yield (new_program, new_step, new_cost) + + else: # part is subgoal, remove part + new_after = list(program[last-1:]) + for j, t in enumerate(new_after): + if t.rule > token.rule: + break + new_after[j] = t.clone(part=t.part-1) + new_program = program[:first-1] + tuple(new_after) + new_step = ('remove_part', first-1, (program[first-1],) + part, (), first-1) + new_cost = removes.get(part_normal, 1.0) * 0.99 + yield (new_program, new_step, new_cost) + + + # Insert a new part (goal) after this part. + if token.type in ('COMMA', 'FROM'): + for new, cost in inserts.items(): + # Don't try to insert a head into the body. + if new[-1].type == 'FROM': + continue + + new_real = tuple([t.clone(rule=program[first].rule, + part=program[first].part+1) + for t in map_vars([], new, [], variables) + [Token('COMMA', ',', stop=True)]]) + + new_after = list(program[last:]) + for j, t in enumerate(new_after): + if t.rule > token.rule: + break + new_after[j] = t.clone(rule=program[first].rule, part=t.part+1) + + new_program = program[:last] + new_real + tuple(new_after) + new_step = ('add_part', last, (), new_real, last) + new_cost = cost * 0.4 + yield (new_program, new_step, new_cost) + + # Insert a new part (goal) at the end of current rule. + if token.type == 'PERIOD': + for new, cost in inserts.items(): + # Don't try to insert a head into the body. + if new[-1].type == 'FROM': + continue + + prepend = Token('FROM', ':-') if token.part == 0 else Token('COMMA', ',') + new_real = (prepend.clone(stop=True, rule=token.rule, part=token.part),) + \ + tuple([t.clone(rule=token.rule, part=token.part+1) + for t in map_vars([], new, [], variables)]) + + new_after = list(program[last-1:]) + for j, t in enumerate(new_after): + if t.rule > token.rule: + break + new_after[j] = t.clone(rule=t.rule, part=t.part+1) + + new_program = program[:last-1] + new_real + tuple(new_after) + new_step = ('add_part', last-1, (), new_real, last) + new_cost = cost * 0.4 + yield (new_program, new_step, new_cost) + + # Return a cleaned-up list of steps. def postprocess(steps): new_steps = [] for step in steps: + # Remove the last field from each step as it is unnecessary after a + # path has been found. + step = step[:4] if new_steps: prev = new_steps[-1] + if prev[0] == 'remove_part' and step[0] == 'remove_part' and \ + prev[1] == step[1]: + new_steps[-1] = ('remove_part', prev[1], prev[2]+step[2], ()) + continue + + if prev[0] == 'remove_part' and step[0] == 'add_part' and \ + prev[1] == step[1]: + new_steps[-1] = ('change_part', prev[1], prev[2], step[3]) + continue - if prev[1] == step[1] and \ - prev[0] in ('add_rule', 'add_subgoal', 'change_line') and \ - step[0] == 'change_line' and \ - normalized(prev[2][1]) == normalized(step[2][0]): - new_steps[-1] = (prev[0], prev[1], (prev[2][0], step[2][1])) + if prev[0] == 'change_part' and step[0] == 'change_part' and \ + prev[1] == step[1] and step[2] == prev[3]: + new_steps[-1] = ('change_part', prev[1], prev[2], step[3]) continue - if prev[0] == 'add_subgoal' and step[0] == 'remove_line' and \ - prev[1]+1 == step[1]: - new_steps[-1] = ('change_line', prev[1], (step[2][0], prev[2][1])) + if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \ + prev[1] == step[1] and step[2] == prev[3][:-1]: + new_steps[-1] = (prev[0], prev[1], prev[2], step[3]+(prev[3][-1],)) continue + if prev[0] in ('add_part', 'change_part') and step[0] == 'change_part' and \ + step[1] == prev[1]+1 and step[2] == prev[3][1:]: + new_steps[-1] = (prev[0], prev[1], prev[2], (prev[3][0],)+step[3]) + continue new_steps.append(step) + for step in new_steps: + print('index {}: {} {} → {}'.format( + step[1], step[0], stringify(step[2]), stringify(step[3]))) return new_steps # Main loop: best-first search through generated programs. @@ -129,8 +220,7 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): # Each program gets a task with the sequence of edits that generated it and # the associated cost. First candidate with cost 1 is the initial program. - lines, rules = decompose(code) - todo.push(((tuple(lines), tuple(rules)), (), 1.0), -1.0) + todo.push((program, (), 1.0), -1.0) n_tested = 0 start_time = time.monotonic() @@ -140,10 +230,10 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): task = todo.pop() if task == None: break - (lines, rules), path, path_cost = task + program, path, path_cost = task # If we have already seen this code, skip it. - code = compose(lines, rules) + code = compose(program) if code in done: continue done.add(code) @@ -151,8 +241,8 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): # Print some info about the current task. if debug: print('Cost {:.12f}'.format(path_cost)) - for step_type, line, (before, after) in path: - print('line {}: {} {} → {}'.format(line, step_type, stringify(before), stringify(after))) + for step_type, idx, a, b, _ in path: + print('index {}: {} {} → {}'.format(idx, step_type, stringify(a), stringify(b))) # If the code is correct, we are done. if test(name, code + '\n' + aux_code): @@ -161,12 +251,12 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False): n_tested += 1 # Otherwise generate new solutions. - for new_lines, new_rules, new_step, new_cost in step(lines, rules, path): + for new_program, new_step, new_cost in step(program, path): new_path_cost = path_cost * new_cost if new_path_cost < 0.01: continue new_path = path + (new_step,) - todo.push(((tuple(new_lines), tuple(new_rules)), new_path, new_path_cost), -new_path_cost) + todo.push((new_program, new_path, new_path_cost), -new_path_cost) total_time = time.monotonic() - start_time -- cgit v1.2.1