summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMehmet Akalın <mehmet.akalin@ceng.metu.edu.tr>2015-08-10 10:53:52 +0200
committerAleš Smodiš <aless@guru.si>2015-08-11 14:26:03 +0200
commit7c60e736e514ffb81b12f5ccd45e36c8fcdcaffa (patch)
treee3f54e1ee35ce25cf91a548257a01b1de6de32ea
parent8c1ad089121249c1f81838ce6871f3beb86787da (diff)
Add code to check for typos
-rw-r--r--monkey/edits.py29
-rwxr-xr-xmonkey/monkey.py25
-rwxr-xr-xmonkey/test.py6
3 files changed, 49 insertions, 11 deletions
diff --git a/monkey/edits.py b/monkey/edits.py
index 2dcd653..01e1ffd 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -40,7 +40,7 @@ def trace_graph(trace):
code_next = action.apply(code)
if action.type == 'test':
- submissions.add(code)
+ submissions.add((code, action.total == action.passed))
if action.total == action.passed:
done = True
@@ -172,6 +172,7 @@ def get_edits_from_traces(traces):
edits = collections.Counter()
submissions = collections.Counter()
queries = collections.Counter()
+ names = collections.Counter()
# Counts of traces where each line appears as a leaf / any node.
n_leaf = collections.Counter()
@@ -179,11 +180,17 @@ def get_edits_from_traces(traces):
for trace in traces:
nodes, trace_submissions, trace_queries = trace_graph(trace)
-
+ counted_tokens = []
# Update the submissions/queries counters (use normalized variables).
- for submission in trace_submissions:
- code = stringify(rename_vars(tokenize(submission)))
- submissions[code] += 1
+ for (submission, correct) in trace_submissions:
+ if correct:
+ tokens = list(tokenize(submission))
+ for token in tokens:
+ if token.type == 'NAME' and token.val not in counted_tokens:
+ names[token.val] += 1
+ counted_tokens.append(token.val)
+ code = stringify(rename_vars(tokens))
+ submissions[code] += 1
for query in trace_queries:
code = stringify(rename_vars(tokenize(query)))
queries[code] += 1
@@ -215,7 +222,7 @@ def get_edits_from_traces(traces):
for edit, p in edits.items():
edits[edit] = logistic(p, k=3, x_0=avg_p)
- return edits, submissions, queries
+ return edits, submissions, queries, names
def classify_edits(edits):
inserts = {}
@@ -244,13 +251,17 @@ if __name__ == '__main__':
edits = {}
submissions = {}
queries = {}
+ names = {}
for problem in Problem.objects.all():
print(problem.name)
pid = problem.pk
attempts = Attempt.objects.filter(problem=problem, done=True) \
.exclude(user__groups=None)
traces = [a.trace for a in attempts]
- if traces:
- edits[pid], submissions[pid], queries[pid] = get_edits_from_traces(traces)
+ try:
+ edits[pid], submissions[pid], queries[pid], names[pid] = get_edits_from_traces(traces)
+ except:
+ pass
+
+ pickle.dump((edits, submissions, queries, names), open('edits.pickle', 'wb'))
- pickle.dump((edits, submissions, queries), open('edits.pickle', 'wb'))
diff --git a/monkey/monkey.py b/monkey/monkey.py
index 99f5a2a..02048de 100755
--- a/monkey/monkey.py
+++ b/monkey/monkey.py
@@ -2,11 +2,11 @@
import math
import time
-
import prolog.engine
+
from .edits import classify_edits
from prolog.util import Token, annotate, compose, map_vars, normalized, rename_vars, stringify
-from .util import PQueue
+from .util import damerau_levenshtein, PQueue
# Check whether all tests for problem [name] succeed.
def test(name, code):
@@ -311,3 +311,24 @@ def fix_hints(code, path):
program[idx:idx+len(a)] = [t.clone(pos=program[idx].pos) for t in b]
yield fix_type, start, end, msg
+
+
+# Checks for typos in the code and suggest the nearst uploaded term by other users.
+def check_typos(code, names):
+ for token in annotate(code):
+ if token.type == 'NAME':
+ nearest_name = ' '
+ nearest_dist = 1000
+ own_count = names.get(token.val, 0) # count of the token.val which is compared with the
+ # each name in the names
+ for name in names.items():
+ if name[0] == token.val: # If the names are the skip the code
+ continue
+
+ distance = damerau_levenshtein(token.val, name[0])
+
+ if distance < nearest_dist and distance > 0 and own_count < name[1]:
+ nearest_dist = distance # Set best_dist and best_name if the less one is found
+ nearest_name = name[0]
+ if nearest_dist > 0 and nearest_dist/len(nearest_name) <= 1/3:
+ yield 'typo', token.pos, token.pos + len(token.val) , 'Did you mean "{}"?'.format(nearest_name)
diff --git a/monkey/test.py b/monkey/test.py
index 6549c1b..bb28e9b 100755
--- a/monkey/test.py
+++ b/monkey/test.py
@@ -36,6 +36,7 @@ attempts = Attempt.objects.filter(problem=problem) \
edits = tutor_apps.get_app_config('tutor').edits[problem.pk]
submissions = tutor_apps.get_app_config('tutor').submissions[problem.pk]
queries = tutor_apps.get_app_config('tutor').queries[problem.pk]
+names = tutor_apps.get_app_config('tutor').names[problem.pk]
# Find incorrect submissions.
incorrect_all = []
@@ -135,6 +136,11 @@ elif sys.argv[2] == 'info':
for (before, after), cost in sorted(changes.items(), key=lambda x: x[1]):
print(' {:.4f}\t{} → {}'.format(cost, stringify(before) if before else 'ε',
stringify(after) if after else 'ε'))
+ # Print all observed edits and their costs.
+ elif sys.argv[3] == 'names':
+ for name, count in sorted(names.items(), key=lambda x: x[1]):
+ print(' {:.4f}\t{}'.format(count, name))
+
# Print all student submissions not (yet) corrected.
elif sys.argv[3] == 'unsolved':
for p in sorted(incorrect):