2 files changed, 66 insertions, 38 deletions
diff --git a/monkey/monkey.py b/monkey/monkey.py
index ef4ac2c..cd44bac 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -8,34 +8,32 @@ from .prolog.engine import test
 from .prolog.util import compose, decompose, map_vars, rename_vars, stringify
 from .util import PQueue, Token
 
-# score a program (a list of lines) according to lines distribution
-def score(program, lines):
-    result = 1
-    for line in program:
-        line_normal = list(line)
-        rename_vars(line_normal)
-        line_normal = tuple(line_normal)
-        result *= lines.get(line_normal, 0.01)
-
-    if len(program) == 0 or result == 0:
-        return 0.01
-    return math.pow(result, 1/len(program))
-
-# find a sequence of edits that fixes [code]
+# Starting from [code], find a sequence of [edits] that transforms it into a
+# correct predicate for [name]. Append [aux_code] when testing (available facts
+# and predicates).
+# Return (solution, edits, time spent, #programs checked). If no solution is
+# found within [timeout] seconds, solution='' and edits=[].
 def fix(name, code, edits, aux_code='', timeout=30, debug=False):
     todo = PQueue()  # priority queue of candidate solutions
-    done = set()     # set of already-analyzed solutions
+    done = set()     # programs we have already visited
 
     inserts, removes, changes = classify_edits(edits)
 
-    # Generate states that can be reached from (lines, rules) with one edit.
-    def step(lines, rules):
-        rule_no = 0
+    # Generate states that can be reached from the given program with one edit.
+    # Program code is given as a list of [lines], where each line is a list of
+    # tokens. Rule ranges are given in [rules] (see prolog.util.decompose).
+    def step(lines, rules, prev=None):
+        # Apply edits in order from top to bottom; skip lines with index lower
+        # than last step.
+        start_line = prev[1] if prev else 0
+
         for start, end in rules:
-            rule = lines[start:end]
-            rule_tokens = [t for line in rule for t in line]
+            rule_lines = lines[start:end]
+            rule_tokens = [t for line in rule_lines for t in line]
 
             for line_idx in range(start, end):
+                if line_idx < start_line:
+                    continue
                 line = lines[line_idx]
 
                 line_normal = list(line)
@@ -43,18 +41,18 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
                 line_normal = tuple(line_normal)
 
                 seen = False
+                # Apply each edit that matches this line.
                 for (before, after), cost in changes.items():
                     if line_normal == before:
                         seen = True
                         mapping = map_vars(before, after, line, rule_tokens)
                         after_real = tuple([t if t.type != 'VARIABLE' else Token('VARIABLE', mapping[t.val]) for t in after])
                         new_lines = lines[:line_idx] + (after_real,) + lines[line_idx+1:]
-                        new_step = ((rule_no, line_idx-start), (tuple(line), after_real))
+                        new_step = ('change_line', line_idx, (tuple(line), after_real))
 
                         yield (new_lines, rules, new_step, cost)
 
-                # if nothing could be done with this line, try removing it
-                # (maybe try removing in any case?)
+                # Remove the current line.
                 if line_normal in removes.keys() or not seen:
                     new_lines = lines[:line_idx] + lines[line_idx+1:]
                     new_rules = []
@@ -63,13 +61,12 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
                                               old_end - (0 if old_end <= line_idx else 1))
                         if new_end > new_start:
                             new_rules.append((new_start, new_end))
-                    new_step = ((rule_no, line_idx-start), (tuple(line), ()))
+                    new_step = ('remove_line', line_idx, (tuple(line), ()))
                     new_cost = removes[line_normal] if line_normal in removes.keys() else 0.9
 
                     yield (new_lines, new_rules, new_step, new_cost)
 
-            # try adding a line to this rule… would need to distinguish between
-            # head/body lines in transforms
+            # Add a line to the current rule.
             for after, cost in inserts.items():
                 mapping = map_vars([], after, [], rule_tokens)
                 after_real = [t if t.type != 'VARIABLE' else Token('VARIABLE', mapping[t.val]) for t in after]
@@ -79,20 +76,36 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
                 for old_start, old_end in rules:
                     new_rules.append((old_start + (0 if old_start < end else 1),
                                       old_end + (0 if old_end < end else 1)))
-                new_step = ((rule_no, end-start), ((), after_real))
+                new_step = ('add_subgoal', end, ((), after_real))
 
                 yield (new_lines, new_rules, new_step, cost)
-            rule_no += 1
 
-        # try adding a new fact
+        # Add a new fact at the end.
         if len(rules) < 2:
             for after, cost in inserts.items():
                 new_lines = lines + (after,)
                 new_rules = rules + (((len(lines), len(lines)+1)),)
-                new_step = ((len(new_rules)-1, 0), (tuple(), tuple(after)))
+                new_step = ('add_rule', len(new_lines)-1, (tuple(), tuple(after)))
 
                 yield (new_lines, new_rules, new_step, cost)
 
+    # Return a cleaned-up list of steps:
+    #   - multiple line edits in a single line are merged into one
+    #   - check if any lines can be removed from the program
+    def postprocess(steps):
+        new_steps = []
+        i = 0
+        while i < len(steps):
+            if i < len(steps)-1 and \
+               steps[i][0] == 'change_line' and steps[i+1][0] == 'change_line' and \
+               steps[i][1] == steps[i+1][1] and steps[i][2][1] == steps[i+1][2][0]:
+                new_steps.append(('change_line', steps[i][1], (steps[i][2][0], steps[i+1][2][1])))
+                i += 1
+            else:
+                new_steps.append(steps[i])
+            i += 1
+        return new_steps
+
     # Add the initial program to the queue.
     lines, rules = decompose(code)
     todo.push(((tuple(lines), tuple(rules)), (), 1.0), -1.0)
@@ -119,20 +132,35 @@ def fix(name, code, edits, aux_code='', timeout=30, debug=False):
 
         if debug:
             print('Cost {:.12f}'.format(path_cost))
-            for line, (before, after) in path:
-                print('line ' + str(line) + ':\t' + stringify(before) + ' → ' + stringify(after))
+            for step_type, line, (before, after) in path:
+                print('line {}: {} {} → {}'.format(line, step_type,  stringify(before), stringify(after)))
 
         # If the code is correct, we are done.
         if test(name, code + '\n' + aux_code):
+            path = postprocess(path)
             return code, path, total_time, n_tested
         n_tested += 1
 
         # Otherwise generate new solutions.
         path = list(path)
-        for new_lines, new_rules, new_step, new_cost in step(lines, rules):
-            new_path = tuple(path + [new_step])
+        prev_step = path[-1] if path else None
+        for new_lines, new_rules, new_step, new_cost in step(lines, rules, prev_step):
             new_path_cost = path_cost * new_cost * 0.99
-            if new_path_cost > 0.005:
-                todo.push(((tuple(new_lines), tuple(new_rules)), new_path, new_path_cost), -new_path_cost)
+            if new_path_cost < 0.005:
+                continue
+            new_path = tuple(path + [new_step])
+            todo.push(((tuple(new_lines), tuple(new_rules)), new_path, new_path_cost), -new_path_cost)
 
     return '', [], total_time, n_tested
+
+# Return a list of character ranges modified by the sequence [edits].
+def fix_ranges(edits):
+    marks = []
+    for step_type, line, (before, after) in edits:
+        if step_type == 'change_line':
+            marks.append(('change', (before[0].pos, before[-1].pos+len(before[-1].val))))
+        elif step_type == 'remove_line':
+            marks.append(('remove', (before[0].pos, before[-1].pos+len(before[-1].val))))
+        elif step_type == 'add_subgoal' or step_type == 'add_rule':
+            marks.append((step_type, line))
+    return marks
diff --git a/monkey/test.py b/monkey/test.py
index 01caf08..4bf5db9 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -52,8 +52,8 @@ def print_hint(solution, steps, fix_time, n_tested):
     if solution:
         print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green'))
         print(colored(' Edits', 'blue'))
-        for line, (before, after) in steps:
-            print('  {}:\t{} → {}'.format(line, stringify(before), stringify(after)))
+        for step_type, line, (before, after) in steps:
+            print('  {}: {} {} → {}'.format(line, step_type, stringify(before), stringify(after)))
         print(colored(' Final version', 'blue'))
         print(indent(compose(*decompose(solution)), 2))
     else: