summaryrefslogtreecommitdiff
path: root/aied2018/presentation/rules.tex
diff options
context:
space:
mode:
Diffstat (limited to 'aied2018/presentation/rules.tex')
-rw-r--r--aied2018/presentation/rules.tex125
1 files changed, 97 insertions, 28 deletions
diff --git a/aied2018/presentation/rules.tex b/aied2018/presentation/rules.tex
index fd53f9b..040ad72 100644
--- a/aied2018/presentation/rules.tex
+++ b/aied2018/presentation/rules.tex
@@ -1,34 +1,103 @@
+\textbf{The dataset}
+
+\begin{columns}
+\begin{column}{0.40\textwidth}
+
+\begin{tabular}{l|rrrr|l}
+ & $P_1$ & $P_2$ & $P_3$ & $\ldots$ & class \\
+ \hline
+$S_1$ & 0 & 1 & 1 & $\ldots$ & $correct$ \\
+$S_2$ & 1 & 0 & 0 & $\ldots$ & $correct$ \\
+$S_3$ & 1 & 1 & 0 & $\ldots$ & $incorrect$ \\
+$\vdotswithin{S_4}$ & & $\vdotswithin{1}$ & & & $\vdotswithin{correct}$ \\
+\end{tabular}
+\end{column}
+\begin{column}{0.60\textwidth}
+ \begin{itemize}
+ \item Each submission ($S_1, S_2, S_3, \ldots$) becomes a learning instance.
+ \item Each constructed pattern ($P_1, P_2, P_3, \ldots$) is a binary feature.
+ \item Based on test results each submission is classified either as $correct$ or $incorrect$
+ \end{itemize}
+\end{column}
+\end{columns}
+\vspace{1cm}
+
+
+\begin{columns}
+ \begin{column}{0.01\textwidth}
+ \end{column}
+ \begin{column}{0.59\textwidth}
+ \textbf{Characterizing typical approaches and errors with rule learning}
+ \begin{itemize}
+ \item \emph{n-rules} describe buggy patterns: \\IF $condition$ THEN $incorrect$.
+ \item \emph{p-rules} describe necessary patterns for programs to be correct: \\IF $condition$ THEN $correct$.
+ \end{itemize}
+ \vspace{0.5cm}
+ \textbf{Example:} Implement a function that returns the element with the largest absolute value.
+ \begin{itemize}
+ \item 155 submissions (57 correct, 98 incorrect)
+ \item 8298 patterns, 15 n-rules and 6 p-rules.
+ \end{itemize}
+ \textbf{Correct solution:}
+ \begin{Verbatim}
+\textbf{def} max_abs(l):
+ vmax = l[0]
+ \textbf{for} v \textbf{in} l:
+ \textbf{if} abs(v) > abs(vmax):
+ vmax = v
+ \textbf{return} vmax
+ \end{Verbatim}
+ \vspace{0.5cm}
+ \textbf{Two sample n-rules:}
\begin{itemize}
- \item classification accuracy of each rule must exceed 90\%, because we deem a 10\% false-positive error as acceptable;
- \item each term in the condition of a rule must be significant according to the likelihood test, meaning that each pattern in the condition part is indeed relevant (we set the significance threshold to p=0.05);
- \item a condition can have at most 3 patterns; and
- \item each rule must cover at least 5 distinct programs -- to avoid learning redundant rules representing the same error.
- \end{itemize}
+ \item \textsf{P64 ⇒ incorrect } (covers 22)
+ \item \textsf{P2 ∧ P70 ⇒ incorrect} (covers 17)
+ \end{itemize}
-\begin{table}[t]
- \caption{Solving statistics, classification accuracy, and coverage of rules for several introductory Python problems. The second column shows the number of users attempting the problem. Columns 3 and 4 show the number of all / correct submissions. The next two columns show the classification accuracy for the majority and random-forest classifiers. The last three columns show percentages of covered examples: columns $n_p$ and $n$ give covered incorrect programs (n-rules with presence of patterns and all n-rules), and column $p$ gives the percentage of correct programs covered by p-rules.}
- \centering
- \begin{tabular}{l|c|cc|cc|ccc}
- & & \multicolumn{2}{c|}{\textbf{Submissions}} & \multicolumn{2}{c|}{\textbf{CA}} & \multicolumn{3}{c}{\textbf{Coverage}} \\
- \textbf{Problem} & \textbf{Users} & Total & Correct & Maj & RF & $n_p$ & $n$ & $p$ \\
+ \end{column}
+ \begin{column}{0.40\textwidth}
+ \fbox{
+ \begin{minipage}[t]{0.94\textwidth}
+ \textbf{How useful are patterns?}
+ \begin{itemize}
+ \item Compare accuracies of Random Forest and Majority Classifier.
+ \item Three types of exercises (basic, loops, functions)
+ \end{itemize}
+ \vspace{1cm}
+ \begin{center}
+ \begin{tabular}{l|rr}
+ \textbf{Problem} & Maj & RF \\
\hline
- \textsf{fahrenheit\_to\_celsius}& 521 & 1177 & 495 & 0.579 & 0.933 & 0.708 & 0.935 & 0.867 \\
- %\textsf{pythagorean\_theorem}& 349 & 669 & 317 & 0.499 & 0.809 \\
- \textsf{ballistics}& 248 & 873 & 209 & 0.761 & 0.802 & 0.551 & 0.666 & 0.478 \\
- \textsf{average}& 209 & 482 & 186 & 0.614 & 0.830 & 0.230 & 0.338 & 0.618 \\
+ \textsf{F2C}& 0.579 & 0.933 \\
+ \textsf{ballistics}& 0.761 & 0.802 \\
+ \textsf{average}& 0.614 & 0.830 \\
\hline
- \textsf{buy\_five}& 294 & 476 & 292 & 0.613 & 0.828 & 0.234 & 0.489 & 0.719 \\
- \textsf{competition}& 227 & 327 & 230 & 0.703 & 0.847 & 0.361 & 0.515 & 0.896 \\
- \textsf{top\_shop}& 142 & 476 & 133 & 0.721 & 0.758 & 0.399 & 0.802 & 0.444 \\
- \textsf{minimax}& 74 & 163 & 57 & 0.650 & 0.644 & 0.462 & 0.745 & 0.298 \\
- \textsf{checking\_account}& 132 & 234 & 112 & 0.521 & 0.744 & 0.143 & 0.491 & 0.115\\
- \textsf{consumers\_anon}& 65 & 170 & 53 & 0.688 & 0.800 & 0.376 & 0.880 & 0.623 \\
+ \textsf{buy\_five}& 0.613 & 0.828 \\
+ \textsf{competition}& 0.703 & 0.847 \\
+ \textsf{top\_shop}& 0.721 & 0.758 \\
+ \textsf{minimax}& 0.650 & 0.644 \\
+ \textsf{ch\_account}& 0.521 & 0.744 \\
+ \textsf{con\_anon}& 0.688 & 0.800 \\
\hline
- \textsf{greatest}& 70 & 142 & 83 & 0.585 & 0.859 & 0.492 & 0.746 & 0.880\\
- \textsf{greatest\_abs}& 58 & 155 & 57 & 0.632 & 0.845 & 0.612 & 0.878 & 0.789 \\
- \textsf{greatest\_neg}& 62 & 195 & 71 & 0.636 & 0.815 & 0.621 & 0.960 & 0.718 \\
+ \textsf{greatest}& 0.585 & 0.859 \\
+ \textsf{greatest\_abs}& 0.632 & 0.845 \\
+ \textsf{greatest\_neg}& 0.636 & 0.815 \\
\hline
- Total / average & 2102 & 4811 & 1978 & 0.642 & 0.809 & 0.432 & 0.704 & 0.620 \\
- \end{tabular}
- \label{tab:stats}
-\end{table} \ No newline at end of file
+ Average & 0.642 & 0.809 \\
+ \end{tabular}
+ \end{center}
+ \end{minipage}}
+ \end{column}
+\end{columns}
+\vspace{1.0cm}
+\textbf{Vizualizations of patterns}; left program contains pattern \textsf{P64}, right program contains pattern \textsf{P2} (red) and \textsf{P70} (blue) that matches the call to \textsf{abs} in an assignment statement nested within a for loop and an if clause.
+\begin{Verbatim}
+\textbf{def} max_abs(l): \textbf{def} max_abs(l):
+ vmax = 0 vmax = None
+ \textbf{for} i \textbf{in} range(len(l)): \textbf{for} v \textbf{in} l:
+ \textbf{if} \blue{vmax} < abs(l[i]): \textbf{if} vmax==None or vmax<v:
+ vmax = l[i] \red{vmax} = \blue{abs}(v)
+ \textbf{return} \blue{vmax} \textbf{return} \red{vmax}
+\end{Verbatim}
+
+