From 7c60e736e514ffb81b12f5ccd45e36c8fcdcaffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mehmet=20Akal=C4=B1n?= Date: Mon, 10 Aug 2015 10:53:52 +0200 Subject: Add code to check for typos --- monkey/edits.py | 29 ++++++++++++++++++++--------- monkey/monkey.py | 25 +++++++++++++++++++++++-- monkey/test.py | 6 ++++++ 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/monkey/edits.py b/monkey/edits.py index 2dcd653..01e1ffd 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -40,7 +40,7 @@ def trace_graph(trace): code_next = action.apply(code) if action.type == 'test': - submissions.add(code) + submissions.add((code, action.total == action.passed)) if action.total == action.passed: done = True @@ -172,6 +172,7 @@ def get_edits_from_traces(traces): edits = collections.Counter() submissions = collections.Counter() queries = collections.Counter() + names = collections.Counter() # Counts of traces where each line appears as a leaf / any node. n_leaf = collections.Counter() @@ -179,11 +180,17 @@ def get_edits_from_traces(traces): for trace in traces: nodes, trace_submissions, trace_queries = trace_graph(trace) - + counted_tokens = [] # Update the submissions/queries counters (use normalized variables). - for submission in trace_submissions: - code = stringify(rename_vars(tokenize(submission))) - submissions[code] += 1 + for (submission, correct) in trace_submissions: + if correct: + tokens = list(tokenize(submission)) + for token in tokens: + if token.type == 'NAME' and token.val not in counted_tokens: + names[token.val] += 1 + counted_tokens.append(token.val) + code = stringify(rename_vars(tokens)) + submissions[code] += 1 for query in trace_queries: code = stringify(rename_vars(tokenize(query))) queries[code] += 1 @@ -215,7 +222,7 @@ def get_edits_from_traces(traces): for edit, p in edits.items(): edits[edit] = logistic(p, k=3, x_0=avg_p) - return edits, submissions, queries + return edits, submissions, queries, names def classify_edits(edits): inserts = {} @@ -244,13 +251,17 @@ if __name__ == '__main__': edits = {} submissions = {} queries = {} + names = {} for problem in Problem.objects.all(): print(problem.name) pid = problem.pk attempts = Attempt.objects.filter(problem=problem, done=True) \ .exclude(user__groups=None) traces = [a.trace for a in attempts] - if traces: - edits[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces) + try: + edits[pid], submissions[pid], queries[pid], names[pid] = get_edits_from_traces(traces) + except: + pass + + pickle.dump((edits, submissions, queries, names), open('edits.pickle', 'wb')) - pickle.dump((edits, submissions, queries), open('edits.pickle', 'wb')) diff --git a/monkey/monkey.py b/monkey/monkey.py index 99f5a2a..02048de 100755 --- a/monkey/monkey.py +++ b/monkey/monkey.py @@ -2,11 +2,11 @@ import math import time - import prolog.engine + from .edits import classify_edits from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify -from .util import PQueue +from .util import damerau_levenshtein, PQueue # Check whether all tests for problem [name] succeed. def test(name, code): @@ -311,3 +311,24 @@ def fix_hints(code, path): program[idx:idx+len(a)] = [t.clone(pos=program[idx].pos) for t in b] yield fix_type, start, end, msg + + +# Checks for typos in the code and suggest the nearst uploaded term by other users. +def check_typos(code, names): + for token in annotate(code): + if token.type == 'NAME': + nearest_name = ' ' + nearest_dist = 1000 + own_count = names.get(token.val, 0) # count of the token.val which is compared with the + # each name in the names + for name in names.items(): + if name[0] == token.val: # If the names are the skip the code + continue + + distance = damerau_levenshtein(token.val, name[0]) + + if distance < nearest_dist and distance > 0 and own_count < name[1]: + nearest_dist = distance # Set best_dist and best_name if the less one is found + nearest_name = name[0] + if nearest_dist > 0 and nearest_dist/len(nearest_name) <= 1/3: + yield 'typo', token.pos, token.pos + len(token.val) , 'Did you mean "{}"?'.format(nearest_name) diff --git a/monkey/test.py b/monkey/test.py index 6549c1b..bb28e9b 100755 --- a/monkey/test.py +++ b/monkey/test.py @@ -36,6 +36,7 @@ attempts = Attempt.objects.filter(problem=problem) \ edits = tutor_apps.get_app_config('tutor').edits[problem.pk] submissions = tutor_apps.get_app_config('tutor').submissions[problem.pk] queries = tutor_apps.get_app_config('tutor').queries[problem.pk] +names = tutor_apps.get_app_config('tutor').names[problem.pk] # Find incorrect submissions. incorrect_all = [] @@ -135,6 +136,11 @@ elif sys.argv[2] == 'info': for (before, after), cost in sorted(changes.items(), key=lambda x: x[1]): print(' {:.4f}\t{} → {}'.format(cost, stringify(before) if before else 'ε', stringify(after) if after else 'ε')) + # Print all observed edits and their costs. + elif sys.argv[3] == 'names': + for name, count in sorted(names.items(), key=lambda x: x[1]): + print(' {:.4f}\t{}'.format(count, name)) + # Print all student submissions not (yet) corrected. elif sys.argv[3] == 'unsolved': for p in sorted(incorrect): -- cgit v1.2.1