summaryrefslogtreecommitdiff
path: root/aied2018/presentation/patterns.tex
blob: cdb6aad34f582e93d37a45065b3f833d35b31524 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
AST patterns encode program structure using tree-regular expressions.
For instance, the pattern
\textbf{(a (b (\textasciicircum{} d . e \$)) c)}
matches the tree

\begin{figure}[tbp]
	\centering
\vspace{0.8cm}
	\begin{forest}
for tree={
  font=\sf,
  edge={line width=2pt},
  s sep=1.5cm,
}
[a, name=a,draw,rectangle,blue,thick
  [f]
  [b, name=b,draw,rectangle,blue,thick
    [d, name=d,draw,rectangle,blue,thick]
    [g,name=g]
    [e,name=e,draw,rectangle,blue,thick]]
  [c, name=c,draw,rectangle,blue,thick [j] [k]]
  [h]
]
\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (a) edge[line width=2pt,transform canvas={xshift=1mm,yshift=-2mm}] (b);
\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (a) edge[line width=2pt,transform canvas={xshift=-0.9mm,yshift=-2mm}] (c);
\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (b) edge[line width=2pt,transform canvas={xshift=-1.4mm,yshift=1mm}] (d);
\draw[opacity=0] (b) -- node[anchor=east,thick,blue,opacity=1,transform canvas={xshift=3mm,yshift=1mm}] {\texttt{\textasciicircum}} (d);
\path[draw,thick,relative,blue,transform canvas={xshift=-1.5mm,yshift=0mm},line width=2pt] (b) -- ($ (b)!0.6!(g) $);
\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (b) edge[line width=2pt,transform canvas={xshift=1.5mm,yshift=1mm}] (e);
\draw[opacity=0] (b) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=-3mm,yshift=5mm}] {\texttt{\$}} (e);
	\end{forest}
\vspace{0.4cm}
	\label{fig:tre-example}
\end{figure}

%A tree matching a pattern (blue arrows besides the edges). In the pattern, each arrow $x→y$ means that node $x$ has a child $y$. A shorter line without an arrowhead (e.g. \pattern{b}$\boldsymbol{-}$ \pattern{g}) indicates a wildcard, where the child can be any node. Anchors \texttt{\textbf{\textasciicircum}} and \texttt{\$} mean that the pattern will match only the first or last child.

Such patterns can represent a program’s control structure, relationships between variables, etc. The red pattern below matches nested \code{for} and \code{if} statements, while the blue pattern relates two occurrences of the variable~\code{n}.

\begin{textblock}{0}(4,0.5)
\begin{Verbatim}
\textbf{\red{def}} divisors(\textbf{\blue{n}}):
  \textbf{\red{for}} d \textbf{in} range(1,\textbf{\blue{n}}):
    \textbf{\red{if}} n % d == 0:
      print(d)
\end{Verbatim}
\end{textblock}

\begin{figure}[htb]
	\centering
\vspace{0.8cm}
	\begin{forest}
for tree={
  font=\sf,
  edge={line width=2pt},
}
[Function, name=def, draw,rectangle,red,line width=2pt
  [name, name=name1 [divisors, name=divisors, font=\bfseries\sffamily]]
  [args, name=args1
    [Var, name=args1-1, draw,rectangle,blue,line width=2pt [n, font=\bfseries\sffamily, l=0.7cm]]]
  [body, name=body1, before computing xy={s=6cm}
    [For, name=for, l=7cm, draw,rectangle,red,line width=2pt
      [target
        [Var, l=0.9cm [d, font=\bfseries\sffamily, l=0.7cm]]]
      [iter, name=iter
        [Call, name=call, l=0.9cm
          [func, name=func, l=0.8cm [range, name=range, font=\bfseries\sffamily, l=0.7cm]]
          [args, name=args2, l=0.8cm
            [Num, name=args2-1, l=1cm [1, font=\bfseries\sffamily, l=0.7cm]]
            [Var, name=args2-2, l=1cm,draw,rectangle,blue,line width=2pt [n, font=\bfseries\sffamily, l=0.7cm]]]]]
      [body, name=body2
        [If, name=if, draw,rectangle,red,line width=2pt
          [test, l=0.7cm [Compare, l=0.8cm
            [BinOp
              [Var [n, font=\bfseries\sffamily, l=0.6cm]]
              [\%, font=\bfseries\sffamily]
              [Var [d, font=\bfseries\sffamily, l=0.6cm]]]
            [{==}, font=\bfseries\sffamily, l=0.7cm]
            [Num [0, font=\bfseries\sffamily, l=0.7cm]]]]
        [body, l=0.7cm [Call, l=0.8cm
		[func [print, font=\bfseries\sffamily, l=0.7cm]]
		[args [Var, l=0.7cm [d, font=\bfseries\sffamily, l=0.6cm]]]]]]]]]]
		% first pattern
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (def) edge[line width=2pt,transform canvas={xshift=3.3mm,yshift=0mm}] (body1);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (body1) edge[line width=2pt,transform canvas={xshift=2mm}] (for);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (for) edge[line width=2pt,transform canvas={xshift=1.1mm,yshift=1.5mm}] (body2);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,red] (body2) edge[line width=2pt,transform canvas={xshift=2mm}] (if);
		% second pattern
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (def) edge[line width=2pt,transform canvas={xshift=-3mm,yshift=0mm}] (name1);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (def) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (args1);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (args1) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (args1-1);
		\draw[opacity=0] (args1) -- node[anchor=east,thick,blue,opacity=1,transform canvas={xshift=1.5mm,yshift=-1mm}] {\texttt{\textasciicircum}} (args1-1);
		\draw[opacity=0] (args1) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=-2mm,yshift=0.6mm}] {\texttt{\$}} (args1-1);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (def) edge[line width=2pt,transform canvas={xshift=-2mm,yshift=-1mm}] (body1);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (body1) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (for);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (for) edge[line width=2pt,transform canvas={xshift=-2mm,yshift=1mm}] (iter);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (iter) edge[line width=2pt,transform canvas={xshift=-1.7mm}] (call);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (call) edge[line width=2pt,transform canvas={xshift=-2mm,yshift=1mm}] (func);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (call) edge[line width=2pt,transform canvas={xshift=2.4mm,yshift=0.5mm}] (args2);
		\path[draw,thick,relative,blue,transform canvas={xshift=-2mm},line width=2pt] (args2) -- ($ (args2)!0.6!(args2-1) $);
		\draw[opacity=0] (args2) -- node[anchor=east,thick,blue,opacity=1,font=\large,transform canvas={xshift=0.6mm,yshift=0.4mm}] {\texttt{\textasciicircum}} (args2-1);
		\path[-{Latex[length=5mm,width=2mm]},thick,relative,blue] (args2) edge[line width=2pt,transform canvas={xshift=2.2mm}] (args2-2);
		\draw[opacity=0] (args2) -- node[anchor=west,thick,blue,opacity=1,font=\scriptsize,transform canvas={xshift=0mm,yshift=1mm}] {\texttt{\$}} (args2-2);
	\end{forest}
\vspace{0.4cm}
	\label{fig:patterns-example}
\end{figure}

\textbf{Constructing patterns.} We select interesting subsets of nodes (shown in boxes above). For each nodeset, a pattern is constructed by walking the tree from selected nodes to the root and including nodes on those paths.

\vspace{0.5cm}
Patterns are constructed automatically; we only need to specify the “interesting” kinds of nodesets.
To specify a pattern more precisely, additional nodes adjacent to the path -- such as function name -- may be included.