summaryrefslogtreecommitdiff
path: root/create_data.py
diff options
context:
space:
mode:
authorMartin Možina <martin.mozina@fri.uni-lj.si>2018-05-20 17:36:11 +0200
committerMartin Možina <martin.mozina@fri.uni-lj.si>2018-05-20 17:36:11 +0200
commit8723bd7cd2d227634d67f24e9514d88e9f1c73a7 (patch)
tree5b92bf0360157c7148be13d4e0261bdb2c8ceebd /create_data.py
parent1dbb57208d2b8163a1c007ad0931f859651fc1c2 (diff)
Added create_data and evaluate_orange.
Diffstat (limited to 'create_data.py')
-rw-r--r--create_data.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/create_data.py b/create_data.py
new file mode 100644
index 0000000..2296727
--- /dev/null
+++ b/create_data.py
@@ -0,0 +1,63 @@
+import argparse
+import ast
+import collections
+import os
+
+import pandas
+import sklearn.dummy
+import sklearn.ensemble
+import sklearn.model_selection
+import sklearn.tree
+
+import canonicalize
+import dynamic
+import regex
+
+import main
+import Orange
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Get patterns from student programs.')
+ parser.add_argument('filename', help='Orange file name')
+ parser.add_argument('path', help='path to data directory')
+ parser.add_argument('--dynamic', action="store_true", help='include dynamic atts')
+ parser.add_argument('--names', nargs='*', required=False, help='names that should not be anonymized')
+ parser.add_argument('--exec', required=False, help='code to append for dynamic patterns')
+ parser.add_argument('--inputs', nargs='*', required=False, help='inputs for dynamic patterns')
+
+ args = parser.parse_args()
+ path = args.path.rstrip('/')
+ problem_name = os.path.basename(path)
+
+ programs = main.get_programs(path, args.names, do_canonicalize=True)
+
+ attrs = collections.OrderedDict()
+ attrs.update(regex.get_attributes(programs))
+ if args.dynamic:
+ attrs.update(dynamic.get_attributes(programs, args.exec, args.inputs))
+
+ orange_attrs = []
+ for at in attrs:
+ orange_attrs.append(Orange.data.DiscreteVariable(at, values=('F', 'T')))
+
+ cl = Orange.data.DiscreteVariable('correct', values=('F', 'T'))
+ mcode = Orange.data.StringVariable('code')
+ orange_domain = Orange.data.Domain(orange_attrs, cl, metas=[mcode])
+ orange_data = Orange.data.Table.from_domain(orange_domain)
+ for program in programs:
+ if not program:
+ continue
+ instance = Orange.data.Instance(orange_domain)
+ for at in attrs:
+ instance[at] = program in attrs[at]['programs']
+ instance[cl] = programs[program]['correct']
+ instance[mcode] = program
+ for _ in range(len(programs[program]['users'])):
+ orange_data.append(instance)
+
+ orange_data.save(args.filename)
+
+ fatt = open("attributes.txt", "wt")
+ for at in attrs:
+ fatt.write("{}: {}\n".format(at, str(attrs[at]["desc"]).replace('\n',' ')))