summaryrefslogtreecommitdiff
path: root/aied2018/presentation/rules.tex
diff options
context:
space:
mode:
Diffstat (limited to 'aied2018/presentation/rules.tex')
-rw-r--r--aied2018/presentation/rules.tex187
1 files changed, 108 insertions, 79 deletions
diff --git a/aied2018/presentation/rules.tex b/aied2018/presentation/rules.tex
index f2a31c7..b974530 100644
--- a/aied2018/presentation/rules.tex
+++ b/aied2018/presentation/rules.tex
@@ -1,8 +1,14 @@
\textbf{The dataset}
\begin{columns}
+\begin{column}{0.50\textwidth}
+ \begin{itemize}
+ \item $S_i$: submitted programs
+ \item $P_i$: patterns (binary features)
+ \item each submission is classified either as $correct$ or $incorrect$ based on test cases
+ \end{itemize}
+\end{column}
\begin{column}{0.40\textwidth}
-
\begin{tabular}{l|rrrr|l}
& $P_1$ & $P_2$ & $P_3$ & $\ldots$ & class \\
\hline
@@ -12,98 +18,121 @@ $S_3$ & 1 & 1 & 0 & $\ldots$ & $incorrect$ \\
$\vdotswithin{S_4}$ & & $\vdotswithin{1}$ & & & $\vdotswithin{correct}$ \\
\end{tabular}
\end{column}
-\begin{column}{0.60\textwidth}
- \begin{itemize}
- \item Each submission ($S_1, S_2, S_3, \ldots$) becomes a learning instance.
- \item Each constructed pattern ($P_1, P_2, P_3, \ldots$) is a binary feature.
- \item Based on test results each submission is classified either as $correct$ or $incorrect$
- \end{itemize}
-\end{column}
\end{columns}
-\vspace{2cm}
+\vspace{1cm}
+
+\textbf{Characterizing errors and solutions}
+\vspace{0.5cm}
+Induce rules to predict whether a program is correct (\emph{p-rules}) or not (\emph{n-rules}) based on patterns that appear in it.
+
+\vspace{1cm}
\begin{columns}
- \begin{column}{0.01\textwidth}
- \end{column}
- \begin{column}{0.59\textwidth}
- \textbf{Characterizing typical approaches and errors with rule learning}
- \begin{itemize}
- \item \emph{n-rules} describe buggy patterns: \\IF $condition$ THEN $incorrect$.
- \item \emph{p-rules} describe necessary patterns for programs to be correct: \\IF $condition$ THEN $correct$.
- \end{itemize}
- \vspace{0.5cm}
- \textbf{Example: Greatest Absolutist}
- \begin{itemize}
- \item 155 submissions (57 correct, 98 incorrect)
- \item 8298 patterns, 15 n-rules and 6 p-rules
- \end{itemize}
- \underline{\smash{A solution:}}
- \begin{Verbatim}
+ \begin{column}{0.42\textwidth}
+ \textbf{\emph{n-rules}} describe errors: \\IF $P_1 \land \ldots \land P_k$ THEN $incorrect$
+ \end{column}
+ \begin{column}{0.50\textwidth}
+ \textbf{\emph{p-rules}} describe alternative solutions: \\IF $P_1 \land \ldots \land P_k$ THEN $correct$
+ \end{column}
+\end{columns}
+
+\vspace{1.5cm}
+
+\begin{columns}[T]
+ \begin{column}{0.58\textwidth}
+\textbf{Example: Greatest Absolutist}
+
+\vspace{0.5cm}
+Find element with the largest absolue value.
+
+\vspace{0.5cm}
+\begin{Verbatim}
\textbf{def} max_abs(l):
vmax = l[0]
\textbf{for} v \textbf{in} l:
\textbf{if} abs(v) > abs(vmax):
vmax = v
\textbf{return} vmax
- \end{Verbatim}
- \vspace{0.5cm}
- \underline{\smash{Two sample learned n-rules:}}
- \begin{itemize}
- \item \textsf{P64 ⇒ incorrect } (covers 22)
- \item \textsf{P2 ∧ P70 ⇒ incorrect} (covers 17)
- \end{itemize}
-
- \end{column}
- \begin{column}{0.40\textwidth}
- \fbox{
- \begin{minipage}[t]{0.94\textwidth}
- \vspace{0.5cm}
- \textbf{How useful are patterns?}
- \begin{itemize}
- \item Compare accuracies of Random Forest and Majority Classifier.
- \item Three types of exercises (basic, loops, functions)
- \end{itemize}
- \vspace{0.5cm}
- \begin{center}
- \begin{tabular}{l|rr}
- \textbf{Problem} & Maj & RF \\
- \hline
- \textsf{F2C}& 0.579 & 0.933 \\
- \textsf{ballistics}& 0.761 & 0.802 \\
- \textsf{average}& 0.614 & 0.830 \\
- \hline
- \textsf{buy\_five}& 0.613 & 0.828 \\
- \textsf{competition}& 0.703 & 0.847 \\
- \textsf{top\_shop}& 0.721 & 0.758 \\
- \textsf{minimax}& 0.650 & 0.644 \\
- \textsf{ch\_account}& 0.521 & 0.744 \\
- \textsf{con\_anon}& 0.688 & 0.800 \\
- \hline
- \textsf{greatest}& 0.585 & 0.859 \\
- \textsf{greatest\_abs}& 0.632 & 0.845 \\
- \textsf{greatest\_neg}& 0.636 & 0.815 \\
- \hline
- Average & 0.642 & 0.809 \\
- \end{tabular}
- \end{center}
- \end{minipage}}
- \end{column}
-\end{columns}
-\vspace{1.5cm}
-\underline{\smash{Vizualizations of rules / patterns}}
+\end{Verbatim}
+
+\vspace{0.5cm}
+We collected 155 submissions for this problem. Using extracted patterns our approach induced 15~n-rules and 6~p-rules.
+
+\vspace{1cm}
+\underline{\smash{Example n-rules}}
+
+\vspace{0.5cm}
\begin{itemize}
- \item \textsf{P64} (blue left) matches functions returning variable compared in the \textsf{if} clause.
- \item \textsf{P2} (red right) matches functions that return the variable used in an assignment statement within a for-if block; \textsf{P70} (blue) matches the call to \textsf{abs} in an assignment statement nested within a for loop and an if clause.
+ \item \textsf{\green{$P_G$} ⇒ incorrect } (covers 22)
+ \item \textsf{$\red{P_R} ∧ \blue{P_B}$ ⇒ incorrect} (covers 17)
\end{itemize}
+
+\end{column}
+
+\begin{column}{0.35\textwidth}
+ \textbf{Results}
+
+ \vspace{0.5cm}
+
+ \fbox{
+ \begin{minipage}[t]{0.99\textwidth}
+ \begin{center}
+ \begin{tabular}{l|rr}
+ \textbf{Problem} & Maj & RF \\
+ \hline
+ \textsf{F2C}& 0.579 & 0.933 \\
+ \textsf{ballistics}& 0.761 & 0.802 \\
+ \textsf{average}& 0.614 & 0.830 \\
+ \hline
+ \textsf{buy\_five}& 0.613 & 0.828 \\
+ \textsf{competition}& 0.703 & 0.847 \\
+ \textsf{top\_shop}& 0.721 & 0.758 \\
+ \textsf{minimax}& 0.650 & 0.644 \\
+ \textsf{ch\_account}& 0.521 & 0.744 \\
+ \textsf{con\_anon}& 0.688 & 0.800 \\
+ \hline
+ \textsf{greatest}& 0.585 & 0.859 \\
+ \textsf{greatest\_abs}& 0.632 & 0.845 \\
+ \textsf{greatest\_neg}& 0.636 & 0.815 \\
+ \hline
+ Average & 0.642 & 0.809 \\
+ \end{tabular}
+ \end{center}
+ \end{minipage}}
+\end{column}
+\end{columns}
+
+\vspace{2cm}
+\underline{\smash{Vizualizing rules / patterns}}
+
+\begin{columns}
+\begin{column}{0.4\textwidth}
\begin{Verbatim}
-\textbf{def} max_abs(l): \textbf{def} max_abs(l):
- vmax = 0 vmax = None
- \textbf{for} i \textbf{in} range(len(l)): \textbf{for} v \textbf{in} l:
- \textbf{if} \blue{vmax} < abs(l[i]): \textbf{if} vmax==None or vmax<v:
- vmax = l[i] \red{vmax} = \blue{abs}(v)
- \textbf{return} \blue{vmax} \textbf{return} \red{vmax}
+\textbf{def} max_abs(l):
+ vmax = 0
+ \textbf{for} i \textbf{in} range(len(l)):
+ \textbf{if} \textbf{\green{vmax}} < abs(l[i]):
+ vmax = l[i]
+ \textbf{return} \textbf{\green{vmax}}
\end{Verbatim}
+\end{column}
+\begin{column}{0.55\textwidth}
+\begin{Verbatim}
+\textbf{def} max_abs(l):
+ vmax = None
+ \textbf{for} v \textbf{in} l:
+ \textbf{if} vmax==None \textbf{or} vmax<v:
+ \textbf{\red{vmax}} = \textbf{\blue{abs}}(v)
+ \textbf{return} \textbf{\red{vmax}}
+\end{Verbatim}
+\end{column}
+\end{columns}
+
+\vspace{1.5cm}
+\textbf{Evaluation.}
+We evaluated this approach on exercises covering introduction to Python, loops and functions,
+by comparing the classification accuracy of a random-forest classifier based on patterns to the majority classifier.
%\begin{Verbatim}
%\textbf{def} max_abs(l): \textbf{def} max_abs(l):