From c63a5d96355c89353ccb51798ecf2498ec48225d Mon Sep 17 00:00:00 2001 From: Martin Mozina Date: Thu, 1 Feb 2018 14:49:21 +0100 Subject: Algorithm for rule learning. --- aied2018/aied2018.bib | 30 ++++++++++++++++++++++++++++++ aied2018/rules.tex | 27 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 aied2018/aied2018.bib create mode 100644 aied2018/rules.tex diff --git a/aied2018/aied2018.bib b/aied2018/aied2018.bib new file mode 100644 index 0000000..207b11f --- /dev/null +++ b/aied2018/aied2018.bib @@ -0,0 +1,30 @@ +@article{Mozina:2007, + author = {Martin Mo\v{z}ina and Jure \v{Z}abkar and Ivan Bratko}, + title = {Argument-Based Machine Learning}, + journal = {Artificial Intelligence}, + volume = {171}, + number = {10/15}, + year = {2007}, + pages = {922-937} + } + +@article{demsar2013orange, + author = {Janez Demšar and Tomaž Curk and Aleš Erjavec and Črt Gorup and Tomaž Hočevar and Mitar Milutinovič and Martin Možina and Matija Polajnar and Marko Toplak and Anže Starič and Miha Štajdohar and Lan Umek and Lan Žagar and Jure Žbontar and Marinka Žitnik and Blaž Zupan}, + title = {Orange: Data Mining Toolbox in Python}, + journal = {Journal of Machine Learning Research}, + year = {2013}, + volume = {14}, + pages = {2349-2353}, + url = {http://jmlr.org/papers/v14/demsar13a.html} +} + + +@INPROCEEDINGS{clarkECML1991, + AUTHOR = "Peter Clark and Robin Boswell", + TITLE = "Rule Induction with {CN}2: Some Recent Improvements", + BOOKTITLE = "Machine Learning - Proceeding of the Fifth Europen + Conference (EWSL-91)", + YEAR = "1991", + address = "Berlin", + pages = "151-163" +} \ No newline at end of file diff --git a/aied2018/rules.tex b/aied2018/rules.tex new file mode 100644 index 0000000..fc69828 --- /dev/null +++ b/aied2018/rules.tex @@ -0,0 +1,27 @@ +\section{Rules} + +\subsection{The learning algorithm} +The goal of learning rules in this paper is to extract and explain common approaches and mistakes in student programs. We use a rule learner called ABCN2e implemented within the Orange data mining library~\cite{demsar2013orange}. ABCN2e is an improvement of the classical CN2 algorithm~\cite{clarkECML1991} for learning unordered rules. The differences between CN2 and ABCN2e are described in a technical report found at \url{https://ailab.si/abml}.} + +General rule-learning algorithms, such as CN2, tend to generate large amounts of specific rules, which leads to more accurate results, however this makes them less appropriate for explaining. We will now describe a problem specific configuration of the rule-learning algorithm that extracts relevant and explainable patterns from student programs. + +Each student's program is represented in the feature space of AST patterns described in the previous section. Each program is classified either as \emph{correct} or \emph{incorrect}. The reasons for a program to be incorrect are either: a) it contains some incorrect pattern (a buggy pattern), which needs to be removed or modified, or b) is missing one or several programing constructs (patterns) that should be included before the program can be correct. + +Both reasons can be expressed with classification rules. In the case of buggy patterns, we learn classification rules for incorrect programs, where each condition in the rule must express the presence of a pattern. The condition of such a rule therefore contains a set of patterns that imply a bug in the program. In the case of missing patterns, we learn another set of rules, which cover programs not covered by above rules and may contain missing patterns within their conditions. These rules describe which missing concepts in a program still have to be implemented. All rules explaining incorrect programs are called \emph{n-rules}. + +To learn explainable, meaningful and non-redundant rules, we need to impose the following additional constraints on the rule learner: +\begin{itemize} + \item classification accuracy of each learned rule must exceed 90\%, because we accept a 10\% false-positive error as acceptable. + \item each conjunct in the condition of a rule must be significant according to the likelihood test, meaning that each pattern in the condition part is indeed relevant (we set the significance threshold to p=0.05) + \item each rule should cover at least 5 unique programs to prevent learning redundant rules that would represent the same error with a different combination of patterns. +\end{itemize} + +Different approaches can be represented with rules explaining correct programs. A program is correct when all the necessary patterns are implemented and none of the buggy patterns are present. For each exercise, there are different possible sets of necessary patterns, each set corresponding to a different approach to solving the exercise. + +We use the same constraints as in the case of n-rules and learn rules for correct programs called \emph{p-rules}. In this case, we always require that conditions mention the presence of patterns, since it is easier to explain different approaches of students with something they have written and not with something they have not. To account for possible buggy patterns, the requirement to achieve 90\% classification accuracy was not evaluated on full data, but only on data not covered by n-rules. Hence, a rule can cover an example with a specific approach even though if it contains a buggy pattern. + +\section{Interpretation of rules} + +\section{Evaluation of rules} + + -- cgit v1.2.1