summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--aied2018/presentation/aied_poster.tex33
-rw-r--r--aied2018/presentation/motivation.tex50
-rw-r--r--aied2018/presentation/patterns.tex2
-rw-r--r--aied2018/presentation/rules.tex187
4 files changed, 150 insertions, 122 deletions
diff --git a/aied2018/presentation/aied_poster.tex b/aied2018/presentation/aied_poster.tex
index 73de0e6..7f7db2f 100644
--- a/aied2018/presentation/aied_poster.tex
+++ b/aied2018/presentation/aied_poster.tex
@@ -13,10 +13,10 @@
\usepackage{color}
\newcommand\red[1]{{\begingroup\color[rgb]{0.9,0.2,0.2}#1\endgroup}}
\newcommand\blue[1]{{\begingroup\color[rgb]{0.15,0.15,0.8}#1\endgroup}}
-\newcommand\green[1]{{\begingroup\color[rgb]{0.15,0.8,0.15}#1\endgroup}}
+\newcommand\green[1]{{\begingroup\color[rgb]{0.10,0.7,0.10}#1\endgroup}}
\usepackage{fancyvrb,courier}
-\fvset{commandchars=\\\{\},baselinestretch=0.98,samepage=true,xleftmargin=2.5mm}
+\fvset{commandchars=\\\{\},baselinestretch=0.98,samepage=true,xleftmargin=2.5mm,fontsize=\small}
\usepackage{tikz}
\usepackage{forest}
@@ -53,7 +53,7 @@
\setbeamercolor*{block title}{fg=white,bg=FRIRed}
\setbeamercolor*{block body}{fg=black, bg=white}
- \begin{myblock}{Motivation and Research Questions}
+ \begin{myblock}{Motivation}
\input{motivation.tex}
\end{myblock}
\setbeamercolor*{block title}{fg=white,bg=TitleBG}
@@ -83,27 +83,22 @@
\setbeamercolor*{block title}{fg=white,bg=TitleBG}
\setbeamercolor*{block body}{fg=black, bg=white}
- \begin{myblock}{Learning Rules and Results}
+ \begin{myblock}{Rules and results}
\input{rules.tex}
\end{myblock}\vfill
\setbeamercolor*{block title}{fg=white,bg=FRIRed}
\begin{myblock}{Conclusions}
\begin{itemize}
- \item Abstract-syntax-tree (AST) patterns for representing program patterns.
- \item Patterns are extracted automatically and combined into n-rules(errors) and p-rules (approaches) with machine learning.
-
- \item Patterns are useful, because in our experiment ...
- \begin{itemize}
- \item classification accuracy of Random Forest was on average 17\% higher than default accuracy.
- \item n-rules explained over 70\% of incorrect submissions.
- \item p-rules explained 62\% of correct programs.
- \end{itemize}
- \item However ...
- \begin{itemize}
-
- \item In some domains, patterns were not informative (\textsf{ballistics} and \textsf{minimax}), therefore more sophisticated patterns are needed.
- \item To construct new patterns, a tool for vizualization of patterns is needed.
- \end{itemize}
+ \item AST patterns represent program features, while induced n-rules and p-rules encode errors and solution strategies.
+
+ \item Patterns are useful, because in our experiment ...\\
+ ~~... classification accuracy was on average 17\% higher using patterns;\\
+ ~~... n-rules explained over 70\% of incorrect submissions;\\
+ ~~... p-rules explained 62\% of correct submissions.
+
+ \item However ...\\
+ ~~... patterns were not always informative -- new kinds of patterns are needed;\\
+ ~~... a visualization tool could support discovery of new kinds of useful patterns.
\end{itemize}
\end{myblock}\vfill
}
diff --git a/aied2018/presentation/motivation.tex b/aied2018/presentation/motivation.tex
index 69db435..6d6b95f 100644
--- a/aied2018/presentation/motivation.tex
+++ b/aied2018/presentation/motivation.tex
@@ -1,27 +1,31 @@
- What is wrong with the following Python program that prints all divisors?
- \begin{columns}
- \begin{column}{0.50\textwidth}
-\begin{Verbatim}
+What is wrong with the following program that prints all divisors?
+
+\vspace{0.5cm}
+
+\begin{Verbatim}[xleftmargin=6mm]
\textbf{def} divisors(n):
- \textbf{for} d \textbf{in} range(1, \red{n}):
+ \textbf{for} d \textbf{in} range(1,\red{n}):
\textbf{if} n % d == 0:
- \textbf{print}(d)
+ print(d)
\end{Verbatim}
+
+\vspace{0.5cm}
- \end{column}
- \begin{column} {0.50\textwidth}
- Answer: \texttt{range(1,n)} generates values up to \texttt{n-1}, so \texttt{n} is not printed. Instead, \texttt{range(1,n+1)} is better.
- \end{column}
- \end{columns}
-
- \vspace{2cm}
- Teachers can often spot such erroneous patterns without test cases. Can we represent their knowledge and use it to:
- \begin{itemize}
- \item generate automatic feedback in tutoring systems or
- \item find programs with similar approach / error?
- \end{itemize}
- Two research questions arise:
- \begin{itemize}
- \item RQ1: How can we encode patterns in programs?
- \item RQ2: How can we automatically extract relevant patterns from student solutions?
- \end{itemize} \ No newline at end of file
+Answer: \texttt{range(1,n)} only generates values up to \texttt{n-1}, so \texttt{n} is not printed.
+Instead, \texttt{range(1,n+1)} should be used.
+
+\vspace{1cm}
+
+An experienced teacher can spot such errors easily, without having to run the program. Can we discover that knowledge automatically?
+More specifically:
+
+\vspace{1cm}
+
+\begin{itemize}
+\item How do we encode distinctive program features?
+\item Can we discern which features are relevant to some error or solution?
+\end{itemize}
+
+\vspace{1cm}
+
+We address these questions by using abstract-syntax-tree (AST) patterns to encode program features, and machine-learned rules to discover common errors and solution strategies for beginner Python programs.
diff --git a/aied2018/presentation/patterns.tex b/aied2018/presentation/patterns.tex
index 8c701e9..cdb6aad 100644
--- a/aied2018/presentation/patterns.tex
+++ b/aied2018/presentation/patterns.tex
@@ -42,7 +42,7 @@ Such patterns can represent a program’s control structure, relationships betwe
\textbf{\red{def}} divisors(\textbf{\blue{n}}):
\textbf{\red{for}} d \textbf{in} range(1,\textbf{\blue{n}}):
\textbf{\red{if}} n % d == 0:
- \textbf{print}(d)
+ print(d)
\end{Verbatim}
\end{textblock}
diff --git a/aied2018/presentation/rules.tex b/aied2018/presentation/rules.tex
index f2a31c7..b974530 100644
--- a/aied2018/presentation/rules.tex
+++ b/aied2018/presentation/rules.tex
@@ -1,8 +1,14 @@
\textbf{The dataset}
\begin{columns}
+\begin{column}{0.50\textwidth}
+ \begin{itemize}
+ \item $S_i$: submitted programs
+ \item $P_i$: patterns (binary features)
+ \item each submission is classified either as $correct$ or $incorrect$ based on test cases
+ \end{itemize}
+\end{column}
\begin{column}{0.40\textwidth}
-
\begin{tabular}{l|rrrr|l}
& $P_1$ & $P_2$ & $P_3$ & $\ldots$ & class \\
\hline
@@ -12,98 +18,121 @@ $S_3$ & 1 & 1 & 0 & $\ldots$ & $incorrect$ \\
$\vdotswithin{S_4}$ & & $\vdotswithin{1}$ & & & $\vdotswithin{correct}$ \\
\end{tabular}
\end{column}
-\begin{column}{0.60\textwidth}
- \begin{itemize}
- \item Each submission ($S_1, S_2, S_3, \ldots$) becomes a learning instance.
- \item Each constructed pattern ($P_1, P_2, P_3, \ldots$) is a binary feature.
- \item Based on test results each submission is classified either as $correct$ or $incorrect$
- \end{itemize}
-\end{column}
\end{columns}
-\vspace{2cm}
+\vspace{1cm}
+
+\textbf{Characterizing errors and solutions}
+\vspace{0.5cm}
+Induce rules to predict whether a program is correct (\emph{p-rules}) or not (\emph{n-rules}) based on patterns that appear in it.
+
+\vspace{1cm}
\begin{columns}
- \begin{column}{0.01\textwidth}
- \end{column}
- \begin{column}{0.59\textwidth}
- \textbf{Characterizing typical approaches and errors with rule learning}
- \begin{itemize}
- \item \emph{n-rules} describe buggy patterns: \\IF $condition$ THEN $incorrect$.
- \item \emph{p-rules} describe necessary patterns for programs to be correct: \\IF $condition$ THEN $correct$.
- \end{itemize}
- \vspace{0.5cm}
- \textbf{Example: Greatest Absolutist}
- \begin{itemize}
- \item 155 submissions (57 correct, 98 incorrect)
- \item 8298 patterns, 15 n-rules and 6 p-rules
- \end{itemize}
- \underline{\smash{A solution:}}
- \begin{Verbatim}
+ \begin{column}{0.42\textwidth}
+ \textbf{\emph{n-rules}} describe errors: \\IF $P_1 \land \ldots \land P_k$ THEN $incorrect$
+ \end{column}
+ \begin{column}{0.50\textwidth}
+ \textbf{\emph{p-rules}} describe alternative solutions: \\IF $P_1 \land \ldots \land P_k$ THEN $correct$
+ \end{column}
+\end{columns}
+
+\vspace{1.5cm}
+
+\begin{columns}[T]
+ \begin{column}{0.58\textwidth}
+\textbf{Example: Greatest Absolutist}
+
+\vspace{0.5cm}
+Find element with the largest absolue value.
+
+\vspace{0.5cm}
+\begin{Verbatim}
\textbf{def} max_abs(l):
vmax = l[0]
\textbf{for} v \textbf{in} l:
\textbf{if} abs(v) > abs(vmax):
vmax = v
\textbf{return} vmax
- \end{Verbatim}
- \vspace{0.5cm}
- \underline{\smash{Two sample learned n-rules:}}
- \begin{itemize}
- \item \textsf{P64 ⇒ incorrect } (covers 22)
- \item \textsf{P2 ∧ P70 ⇒ incorrect} (covers 17)
- \end{itemize}
-
- \end{column}
- \begin{column}{0.40\textwidth}
- \fbox{
- \begin{minipage}[t]{0.94\textwidth}
- \vspace{0.5cm}
- \textbf{How useful are patterns?}
- \begin{itemize}
- \item Compare accuracies of Random Forest and Majority Classifier.
- \item Three types of exercises (basic, loops, functions)
- \end{itemize}
- \vspace{0.5cm}
- \begin{center}
- \begin{tabular}{l|rr}
- \textbf{Problem} & Maj & RF \\
- \hline
- \textsf{F2C}& 0.579 & 0.933 \\
- \textsf{ballistics}& 0.761 & 0.802 \\
- \textsf{average}& 0.614 & 0.830 \\
- \hline
- \textsf{buy\_five}& 0.613 & 0.828 \\
- \textsf{competition}& 0.703 & 0.847 \\
- \textsf{top\_shop}& 0.721 & 0.758 \\
- \textsf{minimax}& 0.650 & 0.644 \\
- \textsf{ch\_account}& 0.521 & 0.744 \\
- \textsf{con\_anon}& 0.688 & 0.800 \\
- \hline
- \textsf{greatest}& 0.585 & 0.859 \\
- \textsf{greatest\_abs}& 0.632 & 0.845 \\
- \textsf{greatest\_neg}& 0.636 & 0.815 \\
- \hline
- Average & 0.642 & 0.809 \\
- \end{tabular}
- \end{center}
- \end{minipage}}
- \end{column}
-\end{columns}
-\vspace{1.5cm}
-\underline{\smash{Vizualizations of rules / patterns}}
+\end{Verbatim}
+
+\vspace{0.5cm}
+We collected 155 submissions for this problem. Using extracted patterns our approach induced 15~n-rules and 6~p-rules.
+
+\vspace{1cm}
+\underline{\smash{Example n-rules}}
+
+\vspace{0.5cm}
\begin{itemize}
- \item \textsf{P64} (blue left) matches functions returning variable compared in the \textsf{if} clause.
- \item \textsf{P2} (red right) matches functions that return the variable used in an assignment statement within a for-if block; \textsf{P70} (blue) matches the call to \textsf{abs} in an assignment statement nested within a for loop and an if clause.
+ \item \textsf{\green{$P_G$} ⇒ incorrect } (covers 22)
+ \item \textsf{$\red{P_R} ∧ \blue{P_B}$ ⇒ incorrect} (covers 17)
\end{itemize}
+
+\end{column}
+
+\begin{column}{0.35\textwidth}
+ \textbf{Results}
+
+ \vspace{0.5cm}
+
+ \fbox{
+ \begin{minipage}[t]{0.99\textwidth}
+ \begin{center}
+ \begin{tabular}{l|rr}
+ \textbf{Problem} & Maj & RF \\
+ \hline
+ \textsf{F2C}& 0.579 & 0.933 \\
+ \textsf{ballistics}& 0.761 & 0.802 \\
+ \textsf{average}& 0.614 & 0.830 \\
+ \hline
+ \textsf{buy\_five}& 0.613 & 0.828 \\
+ \textsf{competition}& 0.703 & 0.847 \\
+ \textsf{top\_shop}& 0.721 & 0.758 \\
+ \textsf{minimax}& 0.650 & 0.644 \\
+ \textsf{ch\_account}& 0.521 & 0.744 \\
+ \textsf{con\_anon}& 0.688 & 0.800 \\
+ \hline
+ \textsf{greatest}& 0.585 & 0.859 \\
+ \textsf{greatest\_abs}& 0.632 & 0.845 \\
+ \textsf{greatest\_neg}& 0.636 & 0.815 \\
+ \hline
+ Average & 0.642 & 0.809 \\
+ \end{tabular}
+ \end{center}
+ \end{minipage}}
+\end{column}
+\end{columns}
+
+\vspace{2cm}
+\underline{\smash{Vizualizing rules / patterns}}
+
+\begin{columns}
+\begin{column}{0.4\textwidth}
\begin{Verbatim}
-\textbf{def} max_abs(l): \textbf{def} max_abs(l):
- vmax = 0 vmax = None
- \textbf{for} i \textbf{in} range(len(l)): \textbf{for} v \textbf{in} l:
- \textbf{if} \blue{vmax} < abs(l[i]): \textbf{if} vmax==None or vmax<v:
- vmax = l[i] \red{vmax} = \blue{abs}(v)
- \textbf{return} \blue{vmax} \textbf{return} \red{vmax}
+\textbf{def} max_abs(l):
+ vmax = 0
+ \textbf{for} i \textbf{in} range(len(l)):
+ \textbf{if} \textbf{\green{vmax}} < abs(l[i]):
+ vmax = l[i]
+ \textbf{return} \textbf{\green{vmax}}
\end{Verbatim}
+\end{column}
+\begin{column}{0.55\textwidth}
+\begin{Verbatim}
+\textbf{def} max_abs(l):
+ vmax = None
+ \textbf{for} v \textbf{in} l:
+ \textbf{if} vmax==None \textbf{or} vmax<v:
+ \textbf{\red{vmax}} = \textbf{\blue{abs}}(v)
+ \textbf{return} \textbf{\red{vmax}}
+\end{Verbatim}
+\end{column}
+\end{columns}
+
+\vspace{1.5cm}
+\textbf{Evaluation.}
+We evaluated this approach on exercises covering introduction to Python, loops and functions,
+by comparing the classification accuracy of a random-forest classifier based on patterns to the majority classifier.
%\begin{Verbatim}
%\textbf{def} max_abs(l): \textbf{def} max_abs(l):