import argparse import ast import collections import os import pandas import sklearn.dummy import sklearn.ensemble import sklearn.model_selection import sklearn.tree import canonicalize import dynamic import regex import main import Orange if __name__ == '__main__': parser = argparse.ArgumentParser(description='Get patterns from student programs.') parser.add_argument('filename', help='Orange file name') parser.add_argument('path', help='path to data directory') parser.add_argument('--dynamic', action="store_true", help='include dynamic atts') parser.add_argument('--names', nargs='*', required=False, help='names that should not be anonymized') parser.add_argument('--exec', required=False, help='code to append for dynamic patterns') parser.add_argument('--inputs', nargs='*', required=False, help='inputs for dynamic patterns') args = parser.parse_args() path = args.path.rstrip('/') problem_name = os.path.basename(path) programs = main.get_programs(path, args.names, do_canonicalize=True) attrs = collections.OrderedDict() attrs.update(regex.get_attributes(programs)) if args.dynamic: attrs.update(dynamic.get_attributes(programs, args.exec, args.inputs)) orange_attrs = [] for at in attrs: orange_attrs.append(Orange.data.DiscreteVariable(at, values=('F', 'T'))) cl = Orange.data.DiscreteVariable('correct', values=('F', 'T')) mcode = Orange.data.StringVariable('code') orange_domain = Orange.data.Domain(orange_attrs, cl, metas=[mcode]) orange_data = Orange.data.Table.from_domain(orange_domain) for program in programs: if not program: continue instance = Orange.data.Instance(orange_domain) for at in attrs: instance[at] = program in attrs[at]['programs'] instance[cl] = programs[program]['correct'] instance[mcode] = program for _ in range(len(programs[program]['users'])): orange_data.append(instance) orange_data.save(args.filename) fatt = open("attributes.txt", "wt") for at in attrs: fatt.write("{}: {}\n".format(at, str(attrs[at]["desc"]).replace('\n',' ')))