From 7c60e736e514ffb81b12f5ccd45e36c8fcdcaffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mehmet=20Akal=C4=B1n?= Date: Mon, 10 Aug 2015 10:53:52 +0200 Subject: Add code to check for typos --- monkey/edits.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'monkey/edits.py') diff --git a/monkey/edits.py b/monkey/edits.py index 2dcd653..01e1ffd 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -40,7 +40,7 @@ def trace_graph(trace): code_next = action.apply(code) if action.type == 'test': - submissions.add(code) + submissions.add((code, action.total == action.passed)) if action.total == action.passed: done = True @@ -172,6 +172,7 @@ def get_edits_from_traces(traces): edits = collections.Counter() submissions = collections.Counter() queries = collections.Counter() + names = collections.Counter() # Counts of traces where each line appears as a leaf / any node. n_leaf = collections.Counter() @@ -179,11 +180,17 @@ def get_edits_from_traces(traces): for trace in traces: nodes, trace_submissions, trace_queries = trace_graph(trace) - + counted_tokens = [] # Update the submissions/queries counters (use normalized variables). - for submission in trace_submissions: - code = stringify(rename_vars(tokenize(submission))) - submissions[code] += 1 + for (submission, correct) in trace_submissions: + if correct: + tokens = list(tokenize(submission)) + for token in tokens: + if token.type == 'NAME' and token.val not in counted_tokens: + names[token.val] += 1 + counted_tokens.append(token.val) + code = stringify(rename_vars(tokens)) + submissions[code] += 1 for query in trace_queries: code = stringify(rename_vars(tokenize(query))) queries[code] += 1 @@ -215,7 +222,7 @@ def get_edits_from_traces(traces): for edit, p in edits.items(): edits[edit] = logistic(p, k=3, x_0=avg_p) - return edits, submissions, queries + return edits, submissions, queries, names def classify_edits(edits): inserts = {} @@ -244,13 +251,17 @@ if __name__ == '__main__': edits = {} submissions = {} queries = {} + names = {} for problem in Problem.objects.all(): print(problem.name) pid = problem.pk attempts = Attempt.objects.filter(problem=problem, done=True) \ .exclude(user__groups=None) traces = [a.trace for a in attempts] - if traces: - edits[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces) + try: + edits[pid], submissions[pid], queries[pid], names[pid] = get_edits_from_traces(traces) + except: + pass + + pickle.dump((edits, submissions, queries, names), open('edits.pickle', 'wb')) - pickle.dump((edits, submissions, queries), open('edits.pickle', 'wb')) -- cgit v1.2.1