From 8723bd7cd2d227634d67f24e9514d88e9f1c73a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Mo=C5=BEina?= Date: Sun, 20 May 2018 17:36:11 +0200 Subject: Added create_data and evaluate_orange. --- create_data.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 create_data.py (limited to 'create_data.py') diff --git a/create_data.py b/create_data.py new file mode 100644 index 0000000..2296727 --- /dev/null +++ b/create_data.py @@ -0,0 +1,63 @@ +import argparse +import ast +import collections +import os + +import pandas +import sklearn.dummy +import sklearn.ensemble +import sklearn.model_selection +import sklearn.tree + +import canonicalize +import dynamic +import regex + +import main +import Orange + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Get patterns from student programs.') + parser.add_argument('filename', help='Orange file name') + parser.add_argument('path', help='path to data directory') + parser.add_argument('--dynamic', action="store_true", help='include dynamic atts') + parser.add_argument('--names', nargs='*', required=False, help='names that should not be anonymized') + parser.add_argument('--exec', required=False, help='code to append for dynamic patterns') + parser.add_argument('--inputs', nargs='*', required=False, help='inputs for dynamic patterns') + + args = parser.parse_args() + path = args.path.rstrip('/') + problem_name = os.path.basename(path) + + programs = main.get_programs(path, args.names, do_canonicalize=True) + + attrs = collections.OrderedDict() + attrs.update(regex.get_attributes(programs)) + if args.dynamic: + attrs.update(dynamic.get_attributes(programs, args.exec, args.inputs)) + + orange_attrs = [] + for at in attrs: + orange_attrs.append(Orange.data.DiscreteVariable(at, values=('F', 'T'))) + + cl = Orange.data.DiscreteVariable('correct', values=('F', 'T')) + mcode = Orange.data.StringVariable('code') + orange_domain = Orange.data.Domain(orange_attrs, cl, metas=[mcode]) + orange_data = Orange.data.Table.from_domain(orange_domain) + for program in programs: + if not program: + continue + instance = Orange.data.Instance(orange_domain) + for at in attrs: + instance[at] = program in attrs[at]['programs'] + instance[cl] = programs[program]['correct'] + instance[mcode] = program + for _ in range(len(programs[program]['users'])): + orange_data.append(instance) + + orange_data.save(args.filename) + + fatt = open("attributes.txt", "wt") + for at in attrs: + fatt.write("{}: {}\n".format(at, str(attrs[at]["desc"]).replace('\n',' '))) -- cgit v1.2.1