From 64b0ff920ea3248d467c5eb082a213204c2d01c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Mo=C5=BEina?= Date: Tue, 19 Jun 2018 20:52:28 +0200 Subject: Representation of rules. --- aied2018/presentation/aied_poster.tex | 59 +++++----------- aied2018/presentation/motivation.tex | 13 ++-- aied2018/presentation/rules.tex | 125 ++++++++++++++++++++++++++-------- 3 files changed, 122 insertions(+), 75 deletions(-) diff --git a/aied2018/presentation/aied_poster.tex b/aied2018/presentation/aied_poster.tex index 359dcb2..287cb5d 100644 --- a/aied2018/presentation/aied_poster.tex +++ b/aied2018/presentation/aied_poster.tex @@ -1,4 +1,4 @@ -\documentclass[final, professionalfont]{beamer} +\documentclass[final]{beamer} \usepackage[orientation=portrait, size=a0, scale=1.4]{beamerposter} \mode{\usetheme{AILAB}} @@ -10,23 +10,23 @@ \newunicodechar{→}{\ensuremath{\rightarrow}} \newunicodechar{⋯}{\ensuremath{\cdots}} -\usepackage{bold-extra} -\usepackage{bm} -\usepackage{hyperref} -\usepackage[normalem]{ulem} - \usepackage{color} \newcommand\red[1]{{\begingroup\color[rgb]{0.8,0.15,0.15}#1\endgroup}} \newcommand\blue[1]{{\begingroup\color[rgb]{0.15,0.15,0.8}#1\endgroup}} \newcommand\green[1]{{\begingroup\color[rgb]{0.15,0.8,0.15}#1\endgroup}} -\usepackage{fancyvrb} +\usepackage{fancyvrb,courier} \fvset{commandchars=\\\{\},baselinestretch=0.98,samepage=true,xleftmargin=2.5mm} +\newcommand{\blockline}{\par\noindent\hspace{-0.03\textwidth}% + {\rule{1.03\textwidth}{3pt}}\par\nobreak} + \usepackage{tikz} \usepackage{forest} \usetikzlibrary{arrows.meta,calc} +\usepackage{mathtools} + \newcommand\code[1]{\texttt{#1}} \newcommand\pattern[1]{\textsf{#1}} @@ -36,36 +36,12 @@ \title{Syntax-based analysis of programming concepts in Python} \author{Martin Možina \& Timotej Lazar} \institute{University of Ljubljana, Faculty of Computer and Information Science, Slovenia} -\def\myemail{martin.mozina@fri.uni-lj.si} +\def\myemail{martin.mozina@fri.uni-lj.si,timotej.lazar@fri.uni-lj.si} \def\mywebpage{http://www.ailab.si/aied2018} \titlegraphic{img/FRI_logo_eng_zaNogo.png} \begin{document} -%\begin{myverbbox}{\VerbFahren} -%F = float(input("Fahrenheit: ")) -%C = 5 / 9 * (F - 32) -%print("Celsius: ", C) -%\end{myverbbox} - -%\begin{myverbbox}{\VerbR1} -%P20 ⇒ incorrect [208, 1] -%\end{myverbbox} - -%\begin{myverbbox}{\VerbP20} -% (Module (body (Assign (value (Call (func (Name (id int) (ctx Load)))))))) -%\end{myverbbox} - -%\begin{myverbbox}{\VerbR2} -% P5 ∧ P35 ⇒ incorrect [72, 0] -%\end{myverbbox} - -%\begin{myverbbox}{\VerbProgram} -%g2 = input() -%g1 = \blue{\underline{int}}(g2) -%print(((g1-32)*(5/9))) -%\end{myverbbox} - \begin{frame}[fragile] @@ -73,12 +49,13 @@ \begin{column}{0.50\textwidth} \begin{beamercolorbox}[center]{postercolumn} - \begin{minipage}[t][\columnheight]{.90\textwidth} % tweaks the width, makes a new \textwidth + \begin{minipage}[t][\columnheight]{.95\textwidth} % tweaks the width, makes a new \textwidth %\parbox[t][\columnheight]{\textwidth}{ % must be some better way to set the the height, width and textwidth simultaneously - + \setbeamercolor*{block title}{fg=white,bg=FRIRed} \setbeamercolor*{block body}{fg=black, bg=white} \begin{myblock}{Motivation and Research Questions} + \input{motivation.tex} \end{myblock} \setbeamercolor*{block title}{fg=white,bg=TitleBG} @@ -98,7 +75,7 @@ \begin{column}{0.50\textwidth} \begin{beamercolorbox}[center]{postercolumn} - \begin{minipage}{.90\textwidth} % tweaks the width, makes a new \textwidth + \begin{minipage}{.95\textwidth} % tweaks the width, makes a new \textwidth \parbox[t][\columnheight]{\textwidth}{ % must be some better way to set the the height, width and textwidth simultaneously \setbeamercolor*{block title}{fg=white,bg=abstract} \setbeamercolor*{block body}{fg=black, bg=tlg} @@ -111,7 +88,7 @@ \end{abstractblock}\vfill \setbeamercolor*{block title}{fg=white,bg=TitleBG} \setbeamercolor*{block body}{fg=black, bg=white} - \begin{myblock}{Learning rules and results} + \begin{myblock}{Learning Rules and Results} \input{rules.tex} \end{myblock}\vfill \setbeamercolor*{block title}{fg=white,bg=FRIRed} @@ -119,15 +96,15 @@ \begin{itemize} \item Abstract-syntax-tree (AST) patterns for representing program patterns. \item Patterns are extracted automatically and combined into n-rules(errors) and p-rules (approaches) with machine learning. - \item Patterns are useful, because ... + \item Patterns are useful, because in our experiment ... \begin{itemize} - \item They increase accuracy by 17\% overall. - \item n-rules explain over 70\% of incorrect submissions. - \item p-rules explain 62\% of correct programs. + \item classification accuracy of Random Forest was 17\% overall higher than default accuracy. + \item n-rules explained over 70\% of incorrect submissions. + \item p-rules explained 62\% of correct programs. \end{itemize} \item However ... \begin{itemize} - \item In some domains, patterns are not informative (\textsf{ballistics} and \textsf{minimax}). + \item In some domains, patterns were not informative (\textsf{ballistics} and \textsf{minimax}), therefore more sophisticated patterns are needed. \item To construct new patterns, a tool for vizualization of patterns is needed. \end{itemize} \end{itemize} diff --git a/aied2018/presentation/motivation.tex b/aied2018/presentation/motivation.tex index 785bd6e..69db435 100644 --- a/aied2018/presentation/motivation.tex +++ b/aied2018/presentation/motivation.tex @@ -1,12 +1,13 @@ - What is wrong with the following program that prints all divisors? + What is wrong with the following Python program that prints all divisors? \begin{columns} \begin{column}{0.50\textwidth} \begin{Verbatim} - \textbf{def} divisors(n): - \textbf{for} d \textbf{in} range(1, \red{n}): - \textbf{if} n % d == 0: - \textbf{print}(d) -\end{Verbatim} +\textbf{def} divisors(n): + \textbf{for} d \textbf{in} range(1, \red{n}): + \textbf{if} n % d == 0: + \textbf{print}(d) +\end{Verbatim} + \end{column} \begin{column} {0.50\textwidth} Answer: \texttt{range(1,n)} generates values up to \texttt{n-1}, so \texttt{n} is not printed. Instead, \texttt{range(1,n+1)} is better. diff --git a/aied2018/presentation/rules.tex b/aied2018/presentation/rules.tex index fd53f9b..040ad72 100644 --- a/aied2018/presentation/rules.tex +++ b/aied2018/presentation/rules.tex @@ -1,34 +1,103 @@ +\textbf{The dataset} + +\begin{columns} +\begin{column}{0.40\textwidth} + +\begin{tabular}{l|rrrr|l} + & $P_1$ & $P_2$ & $P_3$ & $\ldots$ & class \\ + \hline +$S_1$ & 0 & 1 & 1 & $\ldots$ & $correct$ \\ +$S_2$ & 1 & 0 & 0 & $\ldots$ & $correct$ \\ +$S_3$ & 1 & 1 & 0 & $\ldots$ & $incorrect$ \\ +$\vdotswithin{S_4}$ & & $\vdotswithin{1}$ & & & $\vdotswithin{correct}$ \\ +\end{tabular} +\end{column} +\begin{column}{0.60\textwidth} + \begin{itemize} + \item Each submission ($S_1, S_2, S_3, \ldots$) becomes a learning instance. + \item Each constructed pattern ($P_1, P_2, P_3, \ldots$) is a binary feature. + \item Based on test results each submission is classified either as $correct$ or $incorrect$ + \end{itemize} +\end{column} +\end{columns} +\vspace{1cm} + + +\begin{columns} + \begin{column}{0.01\textwidth} + \end{column} + \begin{column}{0.59\textwidth} + \textbf{Characterizing typical approaches and errors with rule learning} + \begin{itemize} + \item \emph{n-rules} describe buggy patterns: \\IF $condition$ THEN $incorrect$. + \item \emph{p-rules} describe necessary patterns for programs to be correct: \\IF $condition$ THEN $correct$. + \end{itemize} + \vspace{0.5cm} + \textbf{Example:} Implement a function that returns the element with the largest absolute value. + \begin{itemize} + \item 155 submissions (57 correct, 98 incorrect) + \item 8298 patterns, 15 n-rules and 6 p-rules. + \end{itemize} + \textbf{Correct solution:} + \begin{Verbatim} +\textbf{def} max_abs(l): + vmax = l[0] + \textbf{for} v \textbf{in} l: + \textbf{if} abs(v) > abs(vmax): + vmax = v + \textbf{return} vmax + \end{Verbatim} + \vspace{0.5cm} + \textbf{Two sample n-rules:} \begin{itemize} - \item classification accuracy of each rule must exceed 90\%, because we deem a 10\% false-positive error as acceptable; - \item each term in the condition of a rule must be significant according to the likelihood test, meaning that each pattern in the condition part is indeed relevant (we set the significance threshold to p=0.05); - \item a condition can have at most 3 patterns; and - \item each rule must cover at least 5 distinct programs -- to avoid learning redundant rules representing the same error. - \end{itemize} + \item \textsf{P64 ⇒ incorrect } (covers 22) + \item \textsf{P2 ∧ P70 ⇒ incorrect} (covers 17) + \end{itemize} -\begin{table}[t] - \caption{Solving statistics, classification accuracy, and coverage of rules for several introductory Python problems. The second column shows the number of users attempting the problem. Columns 3 and 4 show the number of all / correct submissions. The next two columns show the classification accuracy for the majority and random-forest classifiers. The last three columns show percentages of covered examples: columns $n_p$ and $n$ give covered incorrect programs (n-rules with presence of patterns and all n-rules), and column $p$ gives the percentage of correct programs covered by p-rules.} - \centering - \begin{tabular}{l|c|cc|cc|ccc} - & & \multicolumn{2}{c|}{\textbf{Submissions}} & \multicolumn{2}{c|}{\textbf{CA}} & \multicolumn{3}{c}{\textbf{Coverage}} \\ - \textbf{Problem} & \textbf{Users} & Total & Correct & Maj & RF & $n_p$ & $n$ & $p$ \\ + \end{column} + \begin{column}{0.40\textwidth} + \fbox{ + \begin{minipage}[t]{0.94\textwidth} + \textbf{How useful are patterns?} + \begin{itemize} + \item Compare accuracies of Random Forest and Majority Classifier. + \item Three types of exercises (basic, loops, functions) + \end{itemize} + \vspace{1cm} + \begin{center} + \begin{tabular}{l|rr} + \textbf{Problem} & Maj & RF \\ \hline - \textsf{fahrenheit\_to\_celsius}& 521 & 1177 & 495 & 0.579 & 0.933 & 0.708 & 0.935 & 0.867 \\ - %\textsf{pythagorean\_theorem}& 349 & 669 & 317 & 0.499 & 0.809 \\ - \textsf{ballistics}& 248 & 873 & 209 & 0.761 & 0.802 & 0.551 & 0.666 & 0.478 \\ - \textsf{average}& 209 & 482 & 186 & 0.614 & 0.830 & 0.230 & 0.338 & 0.618 \\ + \textsf{F2C}& 0.579 & 0.933 \\ + \textsf{ballistics}& 0.761 & 0.802 \\ + \textsf{average}& 0.614 & 0.830 \\ \hline - \textsf{buy\_five}& 294 & 476 & 292 & 0.613 & 0.828 & 0.234 & 0.489 & 0.719 \\ - \textsf{competition}& 227 & 327 & 230 & 0.703 & 0.847 & 0.361 & 0.515 & 0.896 \\ - \textsf{top\_shop}& 142 & 476 & 133 & 0.721 & 0.758 & 0.399 & 0.802 & 0.444 \\ - \textsf{minimax}& 74 & 163 & 57 & 0.650 & 0.644 & 0.462 & 0.745 & 0.298 \\ - \textsf{checking\_account}& 132 & 234 & 112 & 0.521 & 0.744 & 0.143 & 0.491 & 0.115\\ - \textsf{consumers\_anon}& 65 & 170 & 53 & 0.688 & 0.800 & 0.376 & 0.880 & 0.623 \\ + \textsf{buy\_five}& 0.613 & 0.828 \\ + \textsf{competition}& 0.703 & 0.847 \\ + \textsf{top\_shop}& 0.721 & 0.758 \\ + \textsf{minimax}& 0.650 & 0.644 \\ + \textsf{ch\_account}& 0.521 & 0.744 \\ + \textsf{con\_anon}& 0.688 & 0.800 \\ \hline - \textsf{greatest}& 70 & 142 & 83 & 0.585 & 0.859 & 0.492 & 0.746 & 0.880\\ - \textsf{greatest\_abs}& 58 & 155 & 57 & 0.632 & 0.845 & 0.612 & 0.878 & 0.789 \\ - \textsf{greatest\_neg}& 62 & 195 & 71 & 0.636 & 0.815 & 0.621 & 0.960 & 0.718 \\ + \textsf{greatest}& 0.585 & 0.859 \\ + \textsf{greatest\_abs}& 0.632 & 0.845 \\ + \textsf{greatest\_neg}& 0.636 & 0.815 \\ \hline - Total / average & 2102 & 4811 & 1978 & 0.642 & 0.809 & 0.432 & 0.704 & 0.620 \\ - \end{tabular} - \label{tab:stats} -\end{table} \ No newline at end of file + Average & 0.642 & 0.809 \\ + \end{tabular} + \end{center} + \end{minipage}} + \end{column} +\end{columns} +\vspace{1.0cm} +\textbf{Vizualizations of patterns}; left program contains pattern \textsf{P64}, right program contains pattern \textsf{P2} (red) and \textsf{P70} (blue) that matches the call to \textsf{abs} in an assignment statement nested within a for loop and an if clause. +\begin{Verbatim} +\textbf{def} max_abs(l): \textbf{def} max_abs(l): + vmax = 0 vmax = None + \textbf{for} i \textbf{in} range(len(l)): \textbf{for} v \textbf{in} l: + \textbf{if} \blue{vmax} < abs(l[i]): \textbf{if} vmax==None or vmax