test-rules.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

#!/usr/bin/python3

import argparse
import collections
import json
import os.path
import re
from statistics import mean

from termcolor import colored

from monkey.patterns import get_patterns

parser = argparse.ArgumentParser(description='Evaluate rules on student programs.')
parser.add_argument('path', help='path to data directory')
args = parser.parse_args()
data_dir = args.path

# klass: T/F
# condition: list of patterns
# distribution: rule class distribution
# quality: rule quality
class Rule(collections.namedtuple('Rule', ['klass', 'condition', 'distribution', 'quality'])):
    def __str__(self):
        s = 'Rule: class = {}, distribution = {}, quality = {}\n'.format(self.klass, self.distribution, self.quality)
        s += ''.join([str(pattern) + '\n' for pattern in self.condition])
        return s

# program: submitted code
# correct: does this submission pass all tests?
# patterns: patterns in this submission
# hint: suggested hint
class Submission(collections.namedtuple('Submission', ['program', 'correct', 'patterns', 'hint'])):
    pass

# ok: required patterns already in program (unused)
# remove: patterns that should be removed
# add: patterns that should be added (intersection from all relevant rules)
# add_alternatives: patterns that should be added (union from all relevant rules)
# NOTE currently either (only remove is set) or (both add and add_alternatives are set)
class Hint(collections.namedtuple('Hint', ['ok', 'remove', 'add', 'add_alternatives'])):
    pass

# read attributes
attributes_file = os.path.join(data_dir, 'attributes.tab')
attributes = dict([line.strip().split('\t') for line in open(attributes_file, 'r').readlines()])
attributes_ordered = [line.strip().split('\t')[1] for line in open(attributes_file, 'r').readlines()]

# read rules
rules_file = os.path.join(data_dir, 'rules.txt')
rules = []
for line in open(rules_file, 'r').readlines():
    match = re.match(r'IF ((?:a[0-9]*!=F(?: AND )*)*) THEN correct=([TF]) *\[ *([0-9]*) *([0-9]*)\] *([0-9.]*)', line.strip())
    if match:
        m = tuple(match.groups())
        condition = tuple(attributes[field[:-3]] for field in m[0].split(' AND '))
        rules.append(Rule(m[-4], condition, (int(m[-3]), int(m[-2])), float(m[-1])))
    else:
        print('Did not understand rule:', line.strip())

# export rules for tutor
json_file = os.path.join(data_dir, 'bugs.json')
json_data = {
    'patterns': attributes_ordered,
    'rules': [{
        'condition': r.condition,
        'class': r.klass == 'T',
        'distribution': r.distribution,
        'quality': r.quality,
    } for r in rules],
}
with open(json_file, 'w') as f:
    json.dump(json_data, f, sort_keys=True, indent=2)

def color_print(text, ranges):
    i = 0
    for start, length, color in sorted(ranges):
        # ignore overlapping ranges
        if start < i:
            continue
        print(text[i:start], end='')
        print(colored(text[start:start+length], color), end='')
        i = start + length
    print(text[i:])

# generate marks for selected patterns for color_print
def mark(patterns, selected, color):
    marks = set()
    for pattern, nodes in patterns:
        if pattern in selected:
            marks |= set((n[0].pos, len(n[0].val), color) for n in nodes if n[0].pos)
    return marks

# return a hint for the best applicable buggy rule
def suggest_buggy(rules, patterns):
    for rule in [r for r in rules if r.klass == 'F']:
        # suggest this rule if all patterns in condition are found in the program
        if all(rule_pattern in [p[0] for p in patterns] for rule_pattern in rule.condition):
            return Hint(ok=[], remove=rule.condition, add=[], add_alternatives=[])
    return None

# return a hint for the best applicable true rule
def suggest_true(rules, patterns):
    # get match info for all true rules
    rule_matches = collections.defaultdict(list)
    for rule in [r for r in rules if r.klass == 'T']:
        found = set()
        missing = set()
        for rule_pattern in rule.condition:
            if any(pattern == rule_pattern for pattern, nodes in patterns):
                found.add(rule_pattern)
            else:
                missing.add(rule_pattern)
        if missing:
            rule_matches[len(found)].append((found, missing))

    # return rules with most matching patterns
    for i in range(10, 0, -1):
        if i not in rule_matches:
            continue
        missing_patterns = collections.Counter()
        for found, missing in rule_matches[i]:
            for pattern in missing:
                missing_patterns[pattern] += 1

        best_missing_patterns = []
        for missing_pattern, count in missing_patterns.most_common():
            if count == missing_patterns.most_common()[0][1]:
                best_missing_patterns.append(missing_pattern)
            else:
                break

        add = []
        for pattern in attributes_ordered:
            if pattern in best_missing_patterns:
                add = [pattern]
                break
        add_alternatives = [pattern for pattern, _ in missing_patterns.most_common() if pattern not in add]
        return Hint(ok=[], remove=[], add=add, add_alternatives=add_alternatives)

    return None

# read traces
users_file = os.path.join(data_dir, 'users-test.txt')
users = [int(line.strip()) for line in open(users_file, 'r').readlines()]

# evaluate hints on student traces
submissions = collections.defaultdict(list)
for user in users:
    user_subs = []
    user_dir = os.path.join(data_dir, 'submissions', str(user))
    # each submission is in a file named <seq. no>-<total tests>-<passed tests>
    for submission in sorted(os.listdir(user_dir), key=lambda x: int(x.split('-')[0])):
        seq, total, passed = submission.split('-')
        correct = total == passed
        with open(os.path.join(user_dir, submission), 'r') as f:
            code = f.read()

        # check rules for this submission
        program_patterns = list(get_patterns(code))
        hint = suggest_buggy(rules, program_patterns)
        if not hint:
            hint = suggest_true(rules, program_patterns)
        user_subs.append(Submission(code, correct, program_patterns, hint))

        # skip submissions after the first correct program
        if correct:
            break

    # ignore traces with no / only correct submissions
    if (not any(s.correct for s in user_subs) or
        all(s.correct for s in user_subs)):
        continue

    submissions[user] = user_subs

    # print submissions with hints for debugging
    for s in user_subs:
        print('PASS' if s.correct else 'FAIL', end='\t')
        marks = []
        if s.hint and s.hint.remove:
            marks = mark(s.patterns, s.hint.remove, 'red')
        color_print(s.program, marks)

        if s.hint:
            for x in s.hint.remove:
                print('buggy\t', x)
            for x in s.hint.add:
                print('missing\t', x)
            for x in s.hint.add_alternatives:
                print('alternative\t', x)
        print()
    print('-'*30)
    print()

# submissions where hint pattern was implemented in the solution
good_hint = []
# submissions where one of the alternative hint patterns was implemented in the solution
medium_hint = []
# submissions where none of the hint patterns were implemented in the solution
bad_hint = []
# submissions with no suggestions
no_hint = []

# total number of submissions
n_subs = 0
for user, subs in submissions.items():
    solution = subs[-1]
    solution_patterns = [p[0] for p in solution.patterns]
    for s in subs[:-1]:
        n_subs += 1
        if s.hint:
            if s.hint.remove:
                # buggy rule: at least one pattern should not be present in solution
                if any(pattern not in solution_patterns for pattern in s.hint.remove):
                    good_hint.append(s)
                else:
                    bad_hint.append(s)
            else:
                # true rule: all patterns should be present in solution
                if all(pattern in solution_patterns for pattern in s.hint.add):
                    # best suggested pattern(s) match
                    good_hint.append(s)
                elif any(pattern in solution_patterns for pattern in s.hint.add_alternatives):
                    # some suggested pattern(s) match
                    medium_hint.append(s)
                else:
                    bad_hint.append(s)
        else:
            no_hint.append(s)

print('Statistics')
print('----------')
print('avg. submissions per trace:', mean(len(subs) for subs in submissions.values()))
print('avg. clauses in solution:', mean(subs[-1].program.count('.') for subs in submissions.values()))
print('total submissions:', n_subs)
print('positive hints (best implemented):', len([s for s in good_hint if s.hint.add]))
print('positive hints (alternative implemented):', len([s for s in medium_hint if s.hint.add_alternatives]))
print('positive hints (not implemented):', len([s for s in bad_hint if s.hint.add]))
print('buggy hints (implemented):', len([s for s in good_hint if s.hint.remove]))
print('buggy hints (not implemented):', len([s for s in bad_hint if s.hint.remove]))
print('no hints:', len(no_hint))