summaryrefslogtreecommitdiff
path: root/monkey/test.py
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-11-18 13:20:00 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2015-12-11 16:11:15 +0100
commitfa39fe7bfedd0b2e615d369adb5b510ceb9b857f (patch)
treeabf381b007d1816247cab534ac8e2152695ad596 /monkey/test.py
parentdd723bd01634fa5ffc85402ea10947e472b257af (diff)
Use a more general method for extracting edits
This is a large overhaul of monkey code. Before, only edits within individual lines were tracked, which required a Prolog-specific method for splitting a program into a list of lines for every rule. In this version, modifications can be tracked within arbitrary code ranges. Ranges to be tracked are determined by selecting "interesting" subtrees in the AST of the starting code version. The new method is simpler, less language-dependent and easier to extend. The downside is that a program must be syntactically correct before we can attempt to fix it (the previous approach could handle programs with syntax errors in some cases). This commit also integrates a call to monkey.fix in prolog_session.hint, by running it if no other hint is found.
Diffstat (limited to 'monkey/test.py')
-rwxr-xr-xmonkey/test.py61
1 files changed, 35 insertions, 26 deletions
diff --git a/monkey/test.py b/monkey/test.py
index bca55d8..9eb91e1 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -23,7 +23,7 @@ from termcolor import colored
from db.models import CodeqUser, Problem, Solution
from .graph import graphviz
from . import fix, fix_hints
-from prolog.util import annotate, compose, stringify
+from prolog.util import parse, tokenize, stringify
import server.problems
from .util import indent
@@ -48,8 +48,8 @@ def test(code):
traces = [s.trace for s in Solution.filter(problem_id=problem.id)]
# Load hint database stored in edits.pickle.
-edits, submissions, queries, names = pickle.load(open('edits.pickle', 'rb'))
-edits, submissions, queries, names = edits[problem.id], submissions[problem.id], queries[problem.id], names[problem.id]
+edits, submissions, queries = pickle.load(open('edits.pickle', 'rb'))
+edits, submissions, queries = edits[problem.id], submissions[problem.id], queries[problem.id]
# Load current status (programs for which a hint was found).
try:
@@ -61,13 +61,13 @@ def print_hint(code, solution, steps, fix_time, n_tested):
if solution:
print(colored('Hint found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'green'))
print(colored(' Edits', 'blue'))
- for step_type, pos, a, b in steps:
- print(' {}: {} {} → {}'.format(pos, step_type, stringify(a), stringify(b)))
+ for idx, a, b in steps:
+ print(' {}: {} → {}'.format(idx, stringify(a), stringify(b)))
print(colored(' Hints', 'blue'))
- for fix_type, start, end, msg in fix_hints(code, steps):
- print(' {}-{}: {} (fix type: {})'.format(start, end, msg, fix_type))
+ for hint in fix_hints(code, steps):
+ print(' {}'.format(hint))
print(colored(' Final version', 'blue'))
- print(indent(compose(annotate(solution)), 2))
+ print(indent(stringify(parse(solution)), 2))
else:
print(colored('Hint not found! Tested {} programs in {:.1f} s.'.format(n_tested, fix_time), 'red'))
@@ -88,36 +88,41 @@ if len(sys.argv) == 2:
# Try finding a fix.
print(colored('Analyzing program…', 'yellow'))
- solution, steps, fix_time, n_tested = fix(name, code, edits, test, debug=True)
+ solution, steps, fix_time, n_tested = fix(code, edits, test, debug=True)
print_hint(code, solution, steps, fix_time, n_tested)
# Test fix() on incorrect student submissions.
elif sys.argv[2] == 'test':
timeout = int(sys.argv[3]) if len(sys.argv) == 4 else 10
-
- # Find incorrect submissions.
- incorrect_all = []
- for submission, count in sorted(submissions.items()):
- if not test(submission):
- # This incorrect submission appeared in [count] traces.
- incorrect_all += [submission]*count
- incorrect = set(incorrect_all)
+ incorrect = []
+ for (code, correct), count in sorted(submissions.items()):
+ # Skip syntactically-incorrect submissions.
+ if parse(code) is None:
+ continue
+ if not correct:
+ incorrect += [code] * count
print('Fixing {}/{} programs (timeout={})…'.format(
- len([p for p in incorrect if p not in done]), len(incorrect), timeout))
+ len([code for code in incorrect if code not in done]),
+ len(incorrect), timeout))
+ undone = []
for i, program in enumerate(sorted(incorrect)):
if program in done:
+ done.append(program)
+ continue
+ if program in undone:
continue
- print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
- print(indent(compose(annotate(program)), 2))
- solution, steps, fix_time, n_tested = fix(name, program, edits, test, timeout=timeout)
- if solution:
- done.append(program)
+ print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow'))
+ solution, steps, fix_time, n_tested = fix(program, edits, test, timeout=timeout, debug=True)
print_hint(program, solution, steps, fix_time, n_tested)
print()
+ if solution:
+ done.append(program)
+ else:
+ undone.append(program)
pickle.dump(done, open('status-'+str(problem.id)+'.pickle', 'wb'))
print('Found hints for ' + str(len(done)) + ' of ' + str(len(incorrect)) + ' incorrect programs')
@@ -126,7 +131,7 @@ elif sys.argv[2] == 'test':
elif sys.argv[2] == 'info':
# With no additional arguments, print some stats.
if len(sys.argv) == 3:
- print('Problem {} ({}): {} edits and {} unique submissions in {} traces'.format(
+ print('Problem {} ({}): {} edits and {} different submissions in {} traces'.format(
problem.id, colored(name, 'yellow'),
colored(str(len(edits)), 'yellow'),
colored(str(len(submissions)), 'yellow'),
@@ -149,5 +154,9 @@ elif sys.argv[2] == 'info':
# Print all student submissions and their counts.
elif sys.argv[3] == 'submissions':
- for submission, count in submissions.most_common():
- print('{}\t{}'.format(count, submission))
+ which = None
+ if len(sys.argv) > 4:
+ which = sys.argv[4] == 'good'
+ for (code, correct), count in submissions.most_common():
+ if which is None or correct == which:
+ print('{}\t{}'.format(count, code))