diff options
Diffstat (limited to 'aied2018')
-rw-r--r-- | aied2018/presentation/aied_poster.tex | 25 | ||||
-rw-r--r-- | aied2018/presentation/patterns.tex | 156 | ||||
-rw-r--r-- | aied2018/presentation/rules.tex | 2 |
3 files changed, 96 insertions, 87 deletions
diff --git a/aied2018/presentation/aied_poster.tex b/aied2018/presentation/aied_poster.tex index 287cb5d..ad9850c 100644 --- a/aied2018/presentation/aied_poster.tex +++ b/aied2018/presentation/aied_poster.tex @@ -18,9 +18,6 @@ \usepackage{fancyvrb,courier} \fvset{commandchars=\\\{\},baselinestretch=0.98,samepage=true,xleftmargin=2.5mm} -\newcommand{\blockline}{\par\noindent\hspace{-0.03\textwidth}% - {\rule{1.03\textwidth}{3pt}}\par\nobreak} - \usepackage{tikz} \usepackage{forest} \usetikzlibrary{arrows.meta,calc} @@ -36,13 +33,13 @@ \title{Syntax-based analysis of programming concepts in Python} \author{Martin Možina \& Timotej Lazar} \institute{University of Ljubljana, Faculty of Computer and Information Science, Slovenia} -\def\myemail{martin.mozina@fri.uni-lj.si,timotej.lazar@fri.uni-lj.si} -\def\mywebpage{http://www.ailab.si/aied2018} -\titlegraphic{img/FRI_logo_eng_zaNogo.png} +\def\myemail{$\lbrace$martin.mozina,timotej.lazar$\rbrace$@fri.uni-lj.si} +\def\mywebpage{https://ailab.si/ast-patterns}\titlegraphic{img/FRI_logo_eng_zaNogo.png} \begin{document} + \begin{frame}[fragile] \begin{columns} @@ -51,23 +48,17 @@ \begin{beamercolorbox}[center]{postercolumn} \begin{minipage}[t][\columnheight]{.95\textwidth} % tweaks the width, makes a new \textwidth %\parbox[t][\columnheight]{\textwidth}{ % must be some better way to set the the height, width and textwidth simultaneously - + \setbeamercolor*{block title}{fg=white,bg=FRIRed} \setbeamercolor*{block body}{fg=black, bg=white} \begin{myblock}{Motivation and Research Questions} - - \input{motivation.tex} + \input{motivation.tex} \end{myblock} \setbeamercolor*{block title}{fg=white,bg=TitleBG} - \begin{myblock}{AST Patterns} + \begin{myblock}{AST patterns} \input{patterns.tex} \end{myblock} %} - \setbeamercolor*{block title}{fg=white,bg=TitleBG} - \begin{myblock}{Constructing patterns} - \input{constructing.tex} - \end{myblock} - \end{minipage} \end{beamercolorbox} @@ -75,6 +66,7 @@ \begin{column}{0.50\textwidth} \begin{beamercolorbox}[center]{postercolumn} + \begin{minipage}{.95\textwidth} % tweaks the width, makes a new \textwidth \parbox[t][\columnheight]{\textwidth}{ % must be some better way to set the the height, width and textwidth simultaneously \setbeamercolor*{block title}{fg=white,bg=abstract} @@ -88,6 +80,7 @@ \end{abstractblock}\vfill \setbeamercolor*{block title}{fg=white,bg=TitleBG} \setbeamercolor*{block body}{fg=black, bg=white} + \begin{myblock}{Learning Rules and Results} \input{rules.tex} \end{myblock}\vfill @@ -96,6 +89,7 @@ \begin{itemize} \item Abstract-syntax-tree (AST) patterns for representing program patterns. \item Patterns are extracted automatically and combined into n-rules(errors) and p-rules (approaches) with machine learning. + \item Patterns are useful, because in our experiment ... \begin{itemize} \item classification accuracy of Random Forest was 17\% overall higher than default accuracy. @@ -104,6 +98,7 @@ \end{itemize} \item However ... \begin{itemize} + \item In some domains, patterns were not informative (\textsf{ballistics} and \textsf{minimax}), therefore more sophisticated patterns are needed. \item To construct new patterns, a tool for vizualization of patterns is needed. \end{itemize} diff --git a/aied2018/presentation/patterns.tex b/aied2018/presentation/patterns.tex index b966eba..b581157 100644 --- a/aied2018/presentation/patterns.tex +++ b/aied2018/presentation/patterns.tex @@ -1,90 +1,104 @@ - \begin{figure}[tbp] +AST patterns encode program structure using tree-regular expressions. +For instance, the pattern +\textbf{(a (b (\textasciicircum{} d . e \$)) c)} +matches the tree + +\begin{figure}[tbp] \centering +\vspace{0.8cm} \begin{forest} - for tree={ - font=\sf, - edge=darkgray, - l sep=0, - l=0.9cm, - } - [a,name=a - [f] - [b,name=b - [d,name=d] [g,name=g] [e,name=e]] - [c,name=c [j] [k]] - [h] - ] - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (a) edge[transform canvas={xshift=0.8mm,yshift=-0.2mm}] (b); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (a) edge[transform canvas={xshift=-0.9mm,yshift=-0.3mm}] (c); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (b) edge[transform canvas={xshift=-1.1mm,yshift=0.2mm}] (d); - \draw[opacity=0] (b) -- node[anchor=east,thick,blue,opacity=1,font=\large] {\texttt{\textasciicircum}} (d); - \path[draw,thick,relative,blue,transform canvas={xshift=-0.7mm}] (b) -- ($ (b)!0.6!(g) $); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (b) edge[transform canvas={xshift=1.1mm,yshift=0.2mm}] (e); - \draw[opacity=0] (b) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=0mm,yshift=1mm}] {\texttt{\$}} (e); +for tree={ + font=\sf, + edge={line width=2pt}, + s sep=1.5cm, +} +[a, name=a,draw,rectangle,blue,thick + [f] + [b, name=b,draw,rectangle,blue,thick + [d, name=d,draw,rectangle,blue,thick] + [g,name=g] + [e,name=e,draw,rectangle,blue,thick]] + [c, name=c,draw,rectangle,blue,thick [j] [k]] + [h] +] +\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (a) edge[line width=2pt,transform canvas={xshift=1mm,yshift=-2mm}] (b); +\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (a) edge[line width=2pt,transform canvas={xshift=-0.9mm,yshift=-2mm}] (c); +\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (b) edge[line width=2pt,transform canvas={xshift=-1.4mm,yshift=1mm}] (d); +\draw[opacity=0] (b) -- node[anchor=east,thick,blue,opacity=1,transform canvas={xshift=3mm,yshift=1mm}] {\texttt{\textasciicircum}} (d); +\path[draw,thick,relative,blue,transform canvas={xshift=-1.5mm,yshift=0mm},line width=2pt] (b) -- ($ (b)!0.6!(g) $); +\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (b) edge[line width=2pt,transform canvas={xshift=1.5mm,yshift=1mm}] (e); +\draw[opacity=0] (b) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=-3mm,yshift=5mm}] {\texttt{\$}} (e); \end{forest} - \caption{A tree matching a pattern (blue arrows besides the edges). In the pattern, each arrow $x→y$ means that node $x$ has a child $y$. A shorter line without an arrowhead (e.g. \pattern{b}$\boldsymbol{-}$ \pattern{g}) indicates a wildcard, where the child can be any node. Anchors \texttt{\textbf{\textasciicircum}} and \texttt{\$} mean that the pattern will match only the first or last child.} +\vspace{0.4cm} \label{fig:tre-example} \end{figure} -With TREs we encode interesting patterns in a program while disregarding irrelevant parts. Take for example the following, nearly correct Python function that prints the divisors of its argument $n$: + +%A tree matching a pattern (blue arrows besides the edges). In the pattern, each arrow $x→y$ means that node $x$ has a child $y$. A shorter line without an arrowhead (e.g. \pattern{b}$\boldsymbol{-}$ \pattern{g}) indicates a wildcard, where the child can be any node. Anchors \texttt{\textbf{\textasciicircum}} and \texttt{\$} mean that the pattern will match only the first or last child. + +Such patterns can represent a program’s control structure, relationships between variables, etc. The red pattern below matches nested \code{for} and \code{if} statements, while the blue pattern relates two occurrences of the variable~\code{n}. \begin{figure}[htb] \centering +\vspace{0.8cm} \begin{forest} - for tree={ - font=\sf, - edge=darkgray, - l sep=0, - l=0.9cm, - } - [Function, name=def, draw,rectangle,red,dashed - [name, name=name1 [divisors, name=divisors, font=\bfseries\sffamily]] - [args, name=args1 - [Var, name=args1-1, draw,rectangle,blue [n, font=\bfseries\sffamily, l=0.7cm]]] - [body, name=body1, before computing xy={s=2cm} - [For, name=for, l=1.2cm, draw,rectangle,red,dashed - [target - [Var, l=0.9cm [d, font=\bfseries\sffamily, l=0.7cm]]] - [iter, name=iter - [Call, name=call, l=0.9cm - [func, name=func, l=0.8cm [range, name=range, font=\bfseries\sffamily, l=0.7cm]] - [args, name=args2, l=0.8cm - [Num, name=args2-1, l=1cm [1, font=\bfseries\sffamily, l=0.7cm]] - [Var, name=args2-2, l=1cm,draw,rectangle,blue [n, font=\bfseries\sffamily, l=0.7cm]]]]] - [body, name=body2 - [If, name=if, draw,rectangle,red,dashed - [test, l=0.7cm [Compare, l=0.8cm - [BinOp - [Var [n, font=\bfseries\sffamily, l=0.6cm]] - [\%, font=\bfseries\sffamily] - [Var [d, font=\bfseries\sffamily, l=0.6cm]]] - [{==}, font=\bfseries\sffamily, l=0.7cm] - [Num [0, font=\bfseries\sffamily, l=0.7cm]]]] - [body, l=0.7cm [Call, l=0.8cm +for tree={ + font=\sf, + edge={line width=2pt}, +} +[Function, name=def, draw,rectangle,red,line width=2pt + [name, name=name1 [divisors, name=divisors, font=\bfseries\sffamily]] + [args, name=args1 + [Var, name=args1-1, draw,rectangle,blue,line width=2pt [n, font=\bfseries\sffamily, l=0.7cm]]] + [body, name=body1, before computing xy={s=10cm} + [For, name=for, l=5cm, draw,rectangle,red,line width=2pt + [target + [Var, l=0.9cm [d, font=\bfseries\sffamily, l=0.7cm]]] + [iter, name=iter + [Call, name=call, l=0.9cm + [func, name=func, l=0.8cm [range, name=range, font=\bfseries\sffamily, l=0.7cm]] + [args, name=args2, l=0.8cm + [Num, name=args2-1, l=1cm [1, font=\bfseries\sffamily, l=0.7cm]] + [Var, name=args2-2, l=1cm,draw,rectangle,blue,line width=2pt [n, font=\bfseries\sffamily, l=0.7cm]]]]] + [body, name=body2 + [If, name=if, draw,rectangle,red,line width=2pt + [test, l=0.7cm [Compare, l=0.8cm + [BinOp + [Var [n, font=\bfseries\sffamily, l=0.6cm]] + [\%, font=\bfseries\sffamily] + [Var [d, font=\bfseries\sffamily, l=0.6cm]]] + [{==}, font=\bfseries\sffamily, l=0.7cm] + [Num [0, font=\bfseries\sffamily, l=0.7cm]]]] + [body, l=0.7cm [Call, l=0.8cm [func [print, font=\bfseries\sffamily, l=0.7cm]] [args [Var, l=0.7cm [d, font=\bfseries\sffamily, l=0.6cm]]]]]]]]]] % first pattern - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red,dashed] (def) edge[transform canvas={xshift=1.1mm,yshift=0.2mm}] (body1); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red,dashed] (body1) edge[transform canvas={xshift=0.8mm}] (for); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red,dashed] (for) edge[transform canvas={xshift=1.1mm,yshift=0.5mm}] (body2); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red,dashed] (body2) edge[transform canvas={xshift=0.8mm}] (if); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (def) edge[line width=2pt,transform canvas={xshift=1.1mm,yshift=1.5mm}] (body1); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (body1) edge[line width=2pt,transform canvas={xshift=2mm}] (for); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (for) edge[line width=2pt,transform canvas={xshift=1.1mm,yshift=1.5mm}] (body2); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (body2) edge[line width=2pt,transform canvas={xshift=2mm}] (if); % second pattern - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (def) edge[transform canvas={xshift=-1.1mm,yshift=0.2mm}] (name1); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (def) edge[transform canvas={xshift=-0.8mm}] (args1); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (args1) edge[transform canvas={xshift=-0.8mm}] (args1-1); - \draw[opacity=0] (args1) -- node[anchor=east,thick,blue,opacity=1,font=\large] {\texttt{\textasciicircum}} (args1-1); - \draw[opacity=0] (args1) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=-0.5mm,yshift=0.6mm}] {\texttt{\$}} (args1-1); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (def) edge[transform canvas={xshift=-1mm,yshift=-0.3mm}] (body1); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (body1) edge[transform canvas={xshift=-0.8mm}] (for); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (for) edge[transform canvas={xshift=1mm,yshift=-0.5mm}] (iter); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (iter) edge[transform canvas={xshift=0.8mm}] (call); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (call) edge[transform canvas={xshift=-1.2mm}] (func); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (call) edge[transform canvas={xshift=1.2mm}] (args2); - \path[draw,thick,relative,blue,transform canvas={xshift=-0.7mm}] (args2) -- ($ (args2)!0.55!(args2-1) $); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (def) edge[line width=2pt,transform canvas={xshift=-3mm,yshift=0mm}] (name1); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (def) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (args1); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (args1) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (args1-1); + \draw[opacity=0] (args1) -- node[anchor=east,thick,blue,opacity=1,transform canvas={xshift=1.5mm,yshift=-1mm}] {\texttt{\textasciicircum}} (args1-1); + \draw[opacity=0] (args1) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=-2mm,yshift=0.6mm}] {\texttt{\$}} (args1-1); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (def) edge[line width=2pt,transform canvas={xshift=-2mm,yshift=-1mm}] (body1); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (body1) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (for); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (for) edge[line width=2pt,transform canvas={xshift=-2mm,yshift=1mm}] (iter); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (iter) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (call); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (call) edge[line width=2pt,transform canvas={xshift=-2mm,yshift=1mm}] (func); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (call) edge[line width=2pt,transform canvas={xshift=2.4mm,yshift=0.5mm}] (args2); + \path[draw,thick,relative,blue,transform canvas={xshift=-2mm},line width=2pt] (args2) -- ($ (args2)!0.6!(args2-1) $); \draw[opacity=0] (args2) -- node[anchor=east,thick,blue,opacity=1,font=\large,transform canvas={xshift=0.6mm,yshift=0.4mm}] {\texttt{\textasciicircum}} (args2-1); - \path[-{Latex[length=1.5mm,width=1mm]},thick,relative,blue] (args2) edge[transform canvas={xshift=1mm}] (args2-2); + \path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (args2) edge[line width=2pt,transform canvas={xshift=2.2mm}] (args2-2); \draw[opacity=0] (args2) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=0mm,yshift=1mm}] {\texttt{\$}} (args2-2); \end{forest} - \caption{The AST for the \code{divisors} program with two patterns. Leaf nodes (in bold) correspond to terminals in the program, i.e. names and values. Dashed red arrows represent the pattern describing the control structure of the program. Solid blue arrows encode the incorrect second argument to the \code{range} function.} +\vspace{0.4cm} \label{fig:patterns-example} \end{figure} +\textbf{Constructing patterns.} We select interesting subsets of nodes (shown in boxes above). For each nodeset, a pattern is constructed by walking the tree from selected nodes to the root and including nodes on those paths. + +\vspace{0.5cm} +Patterns are constructed automatically; we only need to specify the “interesting” kinds of nodesets. +To specify a pattern more precisely, additional nodes adjacent to the path -- such as function name -- may be included. diff --git a/aied2018/presentation/rules.tex b/aied2018/presentation/rules.tex index 040ad72..994e3a0 100644 --- a/aied2018/presentation/rules.tex +++ b/aied2018/presentation/rules.tex @@ -48,7 +48,7 @@ $\vdotswithin{S_4}$ & & $\vdotswithin{1}$ & & & $\vdotswithin{correct}$ \\ \textbf{return} vmax \end{Verbatim} \vspace{0.5cm} - \textbf{Two sample n-rules:} + \textbf{Two sample learned n-rules:} \begin{itemize} \item \textsf{P64 ⇒ incorrect } (covers 22) \item \textsf{P2 ∧ P70 ⇒ incorrect} (covers 17) |