From 4838e37e26c3fb72ad509d7aef7f307cc7ae3ef2 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 4 Feb 2015 23:48:56 +0100 Subject: Small cleanups --- monkey/edits.py | 22 +++++++++++++--------- monkey/monkey.py | 10 +++++++--- monkey/test.py | 14 +++++++++----- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/monkey/edits.py b/monkey/edits.py index 333cb12..ad595b6 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -122,15 +122,16 @@ def get_paths(root, path=None, done=None): if done is None: done = set() - # Add [root] to [path] if it is the first node or different than last. + # Add [root] to [path] if it is the first node or different than previous. if not path: path = (root.data[2],) elif root.data[2] != path[-1]: path = path + (root.data[2],) # Return the current path if [root] is a leaf or an empty node. - if len(path) > 1 and not (root.eout and root.data[2]): - yield path + if len(path) > 1: + if not root.eout or not root.data[2]: + yield path # If [root] is an empty node, start a new path. if not root.data[2]: @@ -175,12 +176,11 @@ def get_edits_from_traces(traces): queries[code] += 1 # Get edits. - done = set() + seen_edits = set() for path in get_paths(nodes[0]): for i in range(len(path)): var_names = {} start = tuple(rename_vars(remove_punct(path[i]), var_names)) - for j in range(len(path[i+1:])): var_names_copy = {k: v for k, v in var_names.items()} end = tuple(rename_vars(remove_punct(path[i+1+j]), var_names_copy)) @@ -188,8 +188,8 @@ def get_edits_from_traces(traces): continue edit = (start, end) - if edit not in done: - done.add(edit) + if edit not in seen_edits: + seen_edits.add(edit) edits[edit] += 1 lines[start] += 1 @@ -199,9 +199,13 @@ def get_edits_from_traces(traces): lines[edit[0]] -= edits[edit] del edits[edit] - # Get the probability of each edit given its [before] part. + # Get the probability of each edit given its "before" or "after" part. + max_insert_count = max([count for (before, after), count in edits.items() if not before]) for before, after in edits: - edits[(before, after)] /= lines[before] + if before: + edits[(before, after)] /= max(lines[before], 1) + else: + edits[(before, after)] /= max_insert_count # Normalize line frequencies. if len(lines) > 0: diff --git a/monkey/monkey.py b/monkey/monkey.py index 07d6b0a..e185630 100755 --- a/monkey/monkey.py +++ b/monkey/monkey.py @@ -8,16 +8,20 @@ from prolog.engine import test from prolog.util import Token, compose, decompose, map_vars, rename_vars, stringify from .util import PQueue -# Starting from [code], find a sequence of [edits] that transforms it into a +# Starting from [code], find a sequence of edits that transforms it into a # correct predicate for [name]. Append [aux_code] when testing (available facts # and predicates). # Return (solution, edits, time spent, #programs checked). If no solution is # found within [timeout] seconds, solution='' and edits=[]. -def fix(name, code, edits, aux_code='', timeout=30, debug=False): +def fix(name, code, edits, program_lines, aux_code='', timeout=30, debug=False): + # A dictionary of edits with costs for each edit type (insert, remove or + # change a line). Edits are tuples (before, after), where before and after + # are sequences of tokens. Variable names are normalized to A0, A1, A2,…. + inserts, removes, changes = classify_edits(edits) + # Generate states that can be reached from the given program with one edit. # Program code is given as a list of [lines], where each line is a list of # tokens. Rule ranges are given in [rules] (see prolog.util.decompose). - inserts, removes, changes = classify_edits(edits) def step(lines, rules, prev=None): # Apply edits in order from top to bottom; skip lines with index lower # than last step. diff --git a/monkey/test.py b/monkey/test.py index 0bb047e..b5701a2 100755 --- a/monkey/test.py +++ b/monkey/test.py @@ -18,6 +18,7 @@ from .util import indent # Load django models. os.environ['DJANGO_SETTINGS_MODULE'] = 'webmonkey.settings' django.setup() +from django.apps import apps as tutor_apps from django.contrib.auth.models import User from tutor.models import Attempt, Problem, get_aux_code @@ -27,13 +28,16 @@ if len(sys.argv) < 2: sys.exit(1) pid = int(sys.argv[1]) -# Analyze traces for this problem to get edits, submissions and queries. problem = Problem.objects.get(pk=pid) aux_code = get_aux_code(user=User.objects.get(pk=1), problem=problem) attempts = Attempt.objects.filter(problem=problem) -traces = [a.trace for a in attempts] -edits, lines, submissions, queries = get_edits_from_traces(traces) + +# Load hint database stored in edits.pickle. +edits = tutor_apps.get_app_config('tutor').edits[problem.pk] +lines = tutor_apps.get_app_config('tutor').lines[problem.pk] +submissions = tutor_apps.get_app_config('tutor').submissions[problem.pk] +queries = tutor_apps.get_app_config('tutor').queries[problem.pk] # Find incorrect submissions. incorrect = [] @@ -72,7 +76,7 @@ if len(sys.argv) >= 3 and sys.argv[2] == 'test': print(colored('Analyzing program {0}/{1}…'.format(i+1, len(incorrect)), 'yellow')) print(indent(compose(*decompose(program)), 2)) - solution, steps, fix_time, n_tested = fix(problem.name, program, edits, aux_code=aux_code, timeout=timeout) + solution, steps, fix_time, n_tested = fix(problem.name, program, edits, lines, aux_code=aux_code, timeout=timeout) if solution: done.append(program) print_hint(solution, steps, fix_time, n_tested) @@ -88,7 +92,7 @@ elif len(sys.argv) >= 3 and sys.argv[2] == 'info': if len(sys.argv) == 3: print('Problem {} ({}): {} edits in {} traces, fixed {}/{} ({}/{} unique)'.format( problem.pk, colored(problem.name, 'yellow'), - colored(str(len(edits)), 'yellow'), colored(str(len(traces)), 'yellow'), + colored(str(len(edits)), 'yellow'), colored(str(len([a.trace for a in attempts])), 'yellow'), colored(str(len([p for p in incorrect if p in done])), 'yellow'), colored(str(len(incorrect)), 'yellow'), colored(str(len(set(done))), 'yellow'), -- cgit v1.2.1