1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
import argparse
import ast
import collections
import os
import pandas
import sklearn.dummy
import sklearn.ensemble
import sklearn.model_selection
import sklearn.tree
import canonicalize
import dynamic
import regex
import main
import Orange
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Get patterns from student programs.')
parser.add_argument('filename', help='Orange file name')
parser.add_argument('path', help='path to data directory')
parser.add_argument('--dynamic', action="store_true", help='include dynamic atts')
parser.add_argument('--names', nargs='*', required=False, help='names that should not be anonymized')
parser.add_argument('--exec', required=False, help='code to append for dynamic patterns')
parser.add_argument('--inputs', nargs='*', required=False, help='inputs for dynamic patterns')
args = parser.parse_args()
path = args.path.rstrip('/')
problem_name = os.path.basename(path)
programs = main.get_programs(path, args.names, do_canonicalize=True)
attrs = collections.OrderedDict()
attrs.update(regex.get_attributes(programs))
if args.dynamic:
attrs.update(dynamic.get_attributes(programs, args.exec, args.inputs))
orange_attrs = []
for at in attrs:
orange_attrs.append(Orange.data.DiscreteVariable(at, values=('F', 'T')))
cl = Orange.data.DiscreteVariable('correct', values=('F', 'T'))
mcode = Orange.data.StringVariable('code')
orange_domain = Orange.data.Domain(orange_attrs, cl, metas=[mcode])
orange_data = Orange.data.Table.from_domain(orange_domain)
for program in programs:
if not program:
continue
instance = Orange.data.Instance(orange_domain)
for at in attrs:
instance[at] = program in attrs[at]['programs']
instance[cl] = programs[program]['correct']
instance[mcode] = program
for _ in range(len(programs[program]['users'])):
orange_data.append(instance)
orange_data.save(args.filename)
fatt = open("attributes.txt", "wt")
for at in attrs:
fatt.write("{}: {}\n".format(at, str(attrs[at]["desc"]).replace('\n',' ')))
|