From 4838e37e26c3fb72ad509d7aef7f307cc7ae3ef2 Mon Sep 17 00:00:00 2001
From: Timotej Lazar <timotej.lazar@araneo.org>
Date: Wed, 4 Feb 2015 23:48:56 +0100
Subject: Small cleanups

---
 monkey/edits.py  | 22 +++++++++++++---------
 monkey/monkey.py | 10 +++++++---
 monkey/test.py   | 14 +++++++++-----
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/monkey/edits.py b/monkey/edits.py
index 333cb12..ad595b6 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -122,15 +122,16 @@ def get_paths(root, path=None, done=None):
     if done is None:
         done = set()
 
-    # Add [root] to [path] if it is the first node or different than last.
+    # Add [root] to [path] if it is the first node or different than previous.
     if not path:
         path = (root.data[2],)
     elif root.data[2] != path[-1]:
         path = path + (root.data[2],)
 
     # Return the current path if [root] is a leaf or an empty node.
-    if len(path) > 1 and not (root.eout and root.data[2]):
-        yield path
+    if len(path) > 1:
+        if not root.eout or not root.data[2]:
+            yield path
 
     # If [root] is an empty node, start a new path.
     if not root.data[2]:
@@ -175,12 +176,11 @@ def get_edits_from_traces(traces):
             queries[code] += 1
 
         # Get edits.
-        done = set()
+        seen_edits = set()
         for path in get_paths(nodes[0]):
             for i in range(len(path)):
                 var_names = {}
                 start = tuple(rename_vars(remove_punct(path[i]), var_names))
-
                 for j in range(len(path[i+1:])):
                     var_names_copy = {k: v for k, v in var_names.items()}
                     end = tuple(rename_vars(remove_punct(path[i+1+j]), var_names_copy))
@@ -188,8 +188,8 @@ def get_edits_from_traces(traces):
                         continue
 
                     edit = (start, end)
-                    if edit not in done:
-                        done.add(edit)
+                    if edit not in seen_edits:
+                        seen_edits.add(edit)
                         edits[edit] += 1
                         lines[start] += 1
 
@@ -199,9 +199,13 @@ def get_edits_from_traces(traces):
         lines[edit[0]] -= edits[edit]
         del edits[edit]
 
-    # Get the probability of each edit given its [before] part.
+    # Get the probability of each edit given its "before" or "after" part.
+    max_insert_count = max([count for (before, after), count in edits.items() if not before])
     for before, after in edits:
-        edits[(before, after)] /= lines[before]
+        if before:
+            edits[(before, after)] /= max(lines[before], 1)
+        else:
+            edits[(before, after)] /= max_insert_count
 
     # Normalize line frequencies.
     if len(lines) > 0:
diff --git a/monkey/monkey.py b/monkey/monkey.py
index 07d6b0a..e185630 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -8,16 +8,20 @@ from prolog.engine import test
 from prolog.util import Token, compose, decompose, map_vars, rename_vars, stringify
 from .util import PQueue
 
-# Starting from [code], find a sequence of [edits] that transforms it into a
+# Starting from [code], find a sequence of edits that transforms it into a
 # correct predicate for [name]. Append [aux_code] when testing (available facts
 # and predicates).
 # Return (solution, edits, time spent, #programs checked). If no solution is
 # found within [timeout] seconds, solution='' and edits=[].
-def fix(name, code, edits, aux_code='', timeout=30, debug=False):
+def fix(name, code, edits, program_lines, aux_code='', timeout=30, debug=False):
+    # A dictionary of edits with costs for each edit type (insert, remove or
+    # change a line). Edits are tuples (before, after), where before and after
+    # are sequences of tokens. Variable names are normalized to A0, A1, A2,….
+    inserts, removes, changes = classify_edits(edits)
+
     # Generate states that can be reached from the given program with one edit.
     # Program code is given as a list of [lines], where each line is a list of
     # tokens. Rule ranges are given in [rules] (see prolog.util.decompose).
-    inserts, removes, changes = classify_edits(edits)
     def step(lines, rules, prev=None):
         # Apply edits in order from top to bottom; skip lines with index lower
         # than last step.
diff --git a/monkey/test.py b/monkey/test.py
index 0bb047e..b5701a2 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -18,6 +18,7 @@ from .util import indent
 # Load django models.
 os.environ['DJANGO_SETTINGS_MODULE'] = 'webmonkey.settings'
 django.setup()
+from django.apps import apps as tutor_apps
 from django.contrib.auth.models import User
 from tutor.models import Attempt, Problem, get_aux_code
 
@@ -27,13 +28,16 @@ if len(sys.argv) < 2:
     sys.exit(1)
 pid = int(sys.argv[1])
 
-# Analyze traces for this problem to get edits, submissions and queries.
 problem = Problem.objects.get(pk=pid)
 aux_code = get_aux_code(user=User.objects.get(pk=1), problem=problem)
 
 attempts = Attempt.objects.filter(problem=problem)
-traces = [a.trace for a in attempts]
-edits, lines, submissions, queries = get_edits_from_traces(traces)
+
+# Load hint database stored in edits.pickle.
+edits = tutor_apps.get_app_config('tutor').edits[problem.pk]
+lines = tutor_apps.get_app_config('tutor').lines[problem.pk]
+submissions = tutor_apps.get_app_config('tutor').submissions[problem.pk]
+queries = tutor_apps.get_app_config('tutor').queries[problem.pk]
 
 # Find incorrect submissions.
 incorrect = []
@@ -72,7 +76,7 @@ if len(sys.argv) >= 3 and sys.argv[2] == 'test':
         print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
         print(indent(compose(*decompose(program)), 2))
 
-        solution, steps, fix_time, n_tested = fix(problem.name, program, edits, aux_code=aux_code, timeout=timeout)
+        solution, steps, fix_time, n_tested = fix(problem.name, program, edits, lines, aux_code=aux_code, timeout=timeout)
         if solution:
             done.append(program)
         print_hint(solution, steps, fix_time, n_tested)
@@ -88,7 +92,7 @@ elif len(sys.argv) >= 3 and sys.argv[2] == 'info':
     if len(sys.argv) == 3:
         print('Problem {} ({}): {} edits in {} traces, fixed {}/{} ({}/{} unique)'.format(
             problem.pk, colored(problem.name, 'yellow'),
-            colored(str(len(edits)), 'yellow'), colored(str(len(traces)), 'yellow'),
+            colored(str(len(edits)), 'yellow'), colored(str(len([a.trace for a in attempts])), 'yellow'),
             colored(str(len([p for p in incorrect if p in done])), 'yellow'),
             colored(str(len(incorrect)), 'yellow'),
             colored(str(len(set(done))), 'yellow'),
-- 
cgit v1.2.1