Use a more general method for extracting edits

This is a large overhaul of monkey code. Before, only edits within individual lines were tracked, which required a Prolog-specific method for splitting a program into a list of lines for every rule. In this version, modifications can be tracked within arbitrary code ranges. Ranges to be tracked are determined by selecting "interesting" subtrees in the AST of the starting code version. The new method is simpler, less language-dependent and easier to extend. The downside is that a program must be syntactically correct before we can attempt to fix it (the previous approach could handle programs with syntax errors in some cases). This commit also integrates a call to monkey.fix in prolog_session.hint, by running it if no other hint is found.
author: Timotej Lazar <timotej.lazar@fri.uni-lj.si> 2015-11-18 13:20:00 +0100
committer: Timotej Lazar <timotej.lazar@fri.uni-lj.si> 2015-12-11 16:11:15 +0100
commit: fa39fe7bfedd0b2e615d369adb5b510ceb9b857f (patch)
tree: abf381b007d1816247cab534ac8e2152695ad596 /monkey/test.py
parent: dd723bd01634fa5ffc85402ea10947e472b257af (diff)
1 files changed, 35 insertions, 26 deletions
diff --git a/monkey/test.py b/monkey/test.py
index bca55d8..9eb91e1 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -23,7 +23,7 @@ from termcolor import colored
 from db.models import CodeqUser, Problem, Solution
 from .graph import graphviz
 from . import fix, fix_hints
-from prolog.util import annotate, compose, stringify
+from prolog.util import parse, tokenize, stringify
 import server.problems
 from .util import indent
 
@@ -48,8 +48,8 @@ def test(code):
 traces = [s.trace for s in Solution.filter(problem_id=problem.id)]
 
 # Load hint database stored in edits.pickle.
-edits, submissions, queries, names = pickle.load(open('edits.pickle', 'rb'))
-edits, submissions, queries, names = edits[problem.id], submissions[problem.id], queries[problem.id], names[problem.id]
+edits, submissions, queries = pickle.load(open('edits.pickle', 'rb'))
+edits, submissions, queries = edits[problem.id], submissions[problem.id], queries[problem.id]
 
 # Load current status (programs for which a hint was found).
 try:
@@ -61,13 +61,13 @@ def print_hint(code, solution, steps, fix_time, n_tested):
     if solution:
         print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green'))
         print(colored(' Edits', 'blue'))
-        for step_type, pos, a, b in steps:
-            print('  {}: {} {} → {}'.format(pos, step_type, stringify(a), stringify(b)))
+        for idx, a, b in steps:
+            print('  {}: {} → {}'.format(idx, stringify(a), stringify(b)))
         print(colored(' Hints', 'blue'))
-        for fix_type, start, end, msg in fix_hints(code, steps):
-            print('  {}-{}: {} (fix type: {})'.format(start, end, msg, fix_type))
+        for hint in fix_hints(code, steps):
+            print('  {}'.format(hint))
         print(colored(' Final version', 'blue'))
-        print(indent(compose(annotate(solution)), 2))
+        print(indent(stringify(parse(solution)), 2))
     else:
         print(colored('Hint not found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'red'))
 
@@ -88,36 +88,41 @@ if len(sys.argv) == 2:
 
         # Try finding a fix.
         print(colored('Analyzing program…', 'yellow'))
-        solution, steps, fix_time, n_tested = fix(name, code, edits, test, debug=True)
+        solution, steps, fix_time, n_tested = fix(code, edits, test, debug=True)
         print_hint(code, solution, steps, fix_time, n_tested)
 
 # Test fix() on incorrect student submissions.
 elif sys.argv[2] == 'test':
     timeout = int(sys.argv[3]) if len(sys.argv) == 4 else 10
-
-    # Find incorrect submissions.
-    incorrect_all = []
-    for submission, count in sorted(submissions.items()):
-        if not test(submission):
-            # This incorrect submission appeared in [count] traces.
-            incorrect_all += [submission]*count
-    incorrect = set(incorrect_all)
+    incorrect = []
+    for (code, correct), count in sorted(submissions.items()):
+        # Skip syntactically-incorrect submissions.
+        if parse(code) is None:
+            continue
+        if not correct:
+            incorrect += [code] * count
 
     print('Fixing {}/{} programs (timeout={})…'.format(
-        len([p for p in incorrect if p not in done]), len(incorrect), timeout))
+        len([code for code in incorrect if code not in done]),
+        len(incorrect), timeout))
 
+    undone = []
     for i, program in enumerate(sorted(incorrect)):
         if program in done:
+            done.append(program)
+            continue
+        if program in undone:
             continue
-        print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
-        print(indent(compose(annotate(program)), 2))
 
-        solution, steps, fix_time, n_tested = fix(name, program, edits, test, timeout=timeout)
-        if solution:
-            done.append(program)
+        print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
+        solution, steps, fix_time, n_tested = fix(program, edits, test, timeout=timeout, debug=True)
         print_hint(program, solution, steps, fix_time, n_tested)
         print()
 
+        if solution:
+            done.append(program)
+        else:
+            undone.append(program)
         pickle.dump(done, open('status-'+str(problem.id)+'.pickle', 'wb'))
 
     print('Found hints for ' + str(len(done)) + ' of ' + str(len(incorrect)) + ' incorrect programs')
@@ -126,7 +131,7 @@ elif sys.argv[2] == 'test':
 elif sys.argv[2] == 'info':
     # With no additional arguments, print some stats.
     if len(sys.argv) == 3:
-        print('Problem {} ({}): {} edits and {} unique submissions in {} traces'.format(
+        print('Problem {} ({}): {} edits and {} different submissions in {} traces'.format(
             problem.id, colored(name, 'yellow'),
             colored(str(len(edits)), 'yellow'),
             colored(str(len(submissions)), 'yellow'),
@@ -149,5 +154,9 @@ elif sys.argv[2] == 'info':
 
     # Print all student submissions and their counts.
     elif sys.argv[3] == 'submissions':
-        for submission, count in submissions.most_common():
-            print('{}\t{}'.format(count, submission))
+        which = None
+        if len(sys.argv) > 4:
+            which = sys.argv[4] == 'good'
+        for (code, correct), count in submissions.most_common():
+            if which is None or correct == which:
+                print('{}\t{}'.format(count, code))
author	Timotej Lazar <timotej.lazar@fri.uni-lj.si>	2015-11-18 13:20:00 +0100
committer	Timotej Lazar <timotej.lazar@fri.uni-lj.si>	2015-12-11 16:11:15 +0100
commit	fa39fe7bfedd0b2e615d369adb5b510ceb9b857f (patch)
tree	abf381b007d1816247cab534ac8e2152695ad596 /monkey/test.py
parent	dd723bd01634fa5ffc85402ea10947e472b257af (diff)