From c868100877b4dafcb1251b600bfb85c82532d57c Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Sat, 1 Apr 2017 20:32:46 +0200 Subject: =?UTF-8?q?Rename=20=E2=80=9Caied2017=E2=80=9D=20directory=20to=20?= =?UTF-8?q?=E2=80=9Cpaper=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- aied2017/aied2017.bib | 328 ---------------------------------------------- aied2017/aied2017.tex | 63 --------- aied2017/background.tex | 25 ---- aied2017/conclusion.tex | 19 --- aied2017/evaluation.tex | 71 ---------- aied2017/introduction.tex | 27 ---- aied2017/method.tex | 82 ------------ aied2017/outline.org | 81 ------------ aied2017/patterns.tex | 191 --------------------------- paper/aied2017.bib | 328 ++++++++++++++++++++++++++++++++++++++++++++++ paper/aied2017.tex | 63 +++++++++ paper/background.tex | 25 ++++ paper/conclusion.tex | 19 +++ paper/evaluation.tex | 71 ++++++++++ paper/introduction.tex | 27 ++++ paper/method.tex | 82 ++++++++++++ paper/outline.org | 81 ++++++++++++ paper/patterns.tex | 191 +++++++++++++++++++++++++++ 18 files changed, 887 insertions(+), 887 deletions(-) delete mode 100644 aied2017/aied2017.bib delete mode 100644 aied2017/aied2017.tex delete mode 100644 aied2017/background.tex delete mode 100644 aied2017/conclusion.tex delete mode 100644 aied2017/evaluation.tex delete mode 100644 aied2017/introduction.tex delete mode 100644 aied2017/method.tex delete mode 100644 aied2017/outline.org delete mode 100644 aied2017/patterns.tex create mode 100644 paper/aied2017.bib create mode 100644 paper/aied2017.tex create mode 100644 paper/background.tex create mode 100644 paper/conclusion.tex create mode 100644 paper/evaluation.tex create mode 100644 paper/introduction.tex create mode 100644 paper/method.tex create mode 100644 paper/outline.org create mode 100644 paper/patterns.tex diff --git a/aied2017/aied2017.bib b/aied2017/aied2017.bib deleted file mode 100644 index af09d8a..0000000 --- a/aied2017/aied2017.bib +++ /dev/null @@ -1,328 +0,0 @@ -@inproceedings{bagrak2004trx, - title={{trx}: Regular-tree expressions, now in Scheme}, - author={Bagrak, Ilya and Shivers, Olin}, - booktitle={Proceedings of the Fifth Workshop on Scheme and Functional Programming}, - pages={21--32}, - year={2004} -} - -@article{demsar2013orange, - author = {Janez Demšar and Tomaž Curk and Aleš Erjavec and Črt Gorup and Tomaž Hočevar and Mitar Milutinovič and Martin Možina and Matija Polajnar and Marko Toplak and Anže Starič and Miha Štajdohar and Lan Umek and Lan Žagar and Jure Žbontar and Marinka Žitnik and Blaž Zupan}, - title = {Orange: Data Mining Toolbox in Python}, - journal = {Journal of Machine Learning Research}, - year = {2013}, - volume = {14}, - pages = {2349-2353}, - url = {http://jmlr.org/papers/v14/demsar13a.html} -} - -@inproceedings{folsom-kovarik2010plan, - title={Plan ahead: Pricing {ITS} learner models}, - author={Folsom-Kovarik, Jeremiah T and Schatz, Sae and Nicholson, Denise}, - booktitle={Proc. 19th Behavior Representation in Modeling \& Simulation Conference}, - pages={47--54}, - year={2010} -} - -@article{fossati2015data, - title={Data driven automatic feedback generation in the {iList} intelligent tutoring system}, - author={Fossati, Davide and Di Eugenio, Barbara and Ohlsson, Stellan and Brown, Christopher and Chen, Lin}, - journal={Technology, Instruction, Cognition and Learning}, - volume={10}, - number={1}, - pages={5--26}, - year={2015} -} - -@article{gerdes2016ask-elle, - title={Ask-Elle: an adaptable programming tutor for Haskell giving automated feedback}, - author={Gerdes, Alex and Heeren, Bastiaan and Jeuring, Johan and van Binsbergen, L Thomas}, - journal={International Journal of Artificial Intelligence in Education}, - pages={1--36}, - year={2016}, - publisher={Springer} -} - -@article{glassman2015overcode, - title={{OverCode}: Visualizing variation in student solutions to programming problems at scale}, - author={Glassman, Elena L and Scott, Jeremy and Singh, Rishabh and Guo, Philip J and Miller, Robert C}, - journal={ACM Transactions on Computer-Human Interaction (TOCHI)}, - volume={22}, - number={2}, - pages={7}, - year={2015}, - publisher={ACM} -} - -@inproceedings{holland2009j-latte, - title={{J-LATTE}: a Constraint-based Tutor for {Java}}, - author={Holland, Jay and Mitrovic, Antonija and Martin, Brent}, - booktitle={Proc. 17th Int'l Conf. Computers in Education (ICCE 2009)}, - pages={142--146}, - year={2009} -} - -@article{hong2004guided, - title={Guided programming and automated error analysis in an intelligent {Prolog} tutor}, - author={Hong, Jun}, - journal={International Journal of Human-Computer Studies}, - volume={61}, - number={4}, - pages={505--534}, - year={2004}, - COMMENTpublisher={Elsevier}, -} - -@inproceedings{jin2012program, - title={Program representation for automatic hint generation for a data-driven novice programming tutor}, - author={Jin, Wei and Barnes, Tiffany and Stamper, John and Eagle, Michael John and Johnson, Matthew W and Lehmann, Lorrie}, - booktitle={Proc. 11th Int'l Conf. Intelligent Tutoring Systems (ITS 12)}, - pages={304--309}, - year={2012}, -} - -@article{johnson1990understanding, - title={Understanding and debugging novice programs}, - author={Johnson, W Lewis}, - journal={Artificial Intelligence}, - volume={42}, - number={1}, - pages={51--97}, - year={1990}, - publisher={Elsevier} -} - -@article{kamiya2002ccfinder, - title={{CCFinder}: a multilinguistic token-based code clone detection system for large scale source code}, - author={Kamiya, Toshihiro and Kusumoto, Shinji and Inoue, Katsuro}, - journal={IEEE Transactions on Software Engineering}, - volume={28}, - number={7}, - pages={654--670}, - year={2002}, - publisher={IEEE} -} - -@inproceedings{keuning2016towards, - title={Towards a Systematic Review of Automated Feedback Generation for Programming Exercises}, - author={Keuning, Hieke and Jeuring, Johan and Heeren, Bastiaan}, - booktitle={Proceedings of the 2016 ACM Conference on Innovation and Technology in Computer Science Education}, - pages={41--46}, - year={2016}, - organization={ACM} -} - -@article{koedinger2013new, - title={New potentials for data-driven intelligent tutoring system development and optimization}, - author={Koedinger, Kenneth R and Brunskill, Emma and Baker, Ryan S J d and McLaughlin, Elizabeth A and Stamper, John}, - journal={AI Magazine}, - volume={34}, - number={3}, - pages={27--41}, - year={2013} -} - -@article{le2013operationalizing, - title={Operationalizing the continuum between well-defined and ill-defined problems for educational technology}, - author={Le, Nguyen-Thinh and Loll, Frank and Pinkwart, Niels}, - journal={IEEE Transactions on Learning Technologies}, - volume={6}, - number={3}, - pages={258--270}, - year={2013}, - publisher={IEEE} -} - -@article{le2016classification, - title={A Classification of Adaptive Feedback in Educational Systems for Programming}, - author={Le, Nguyen-Thinh}, - journal={Systems}, - volume={4}, - number={2}, - pages={22}, - year={2016}, - publisher={Multidisciplinary Digital Publishing Institute} -} - -@article{le2009using, - title={Using Weighted Constraints to Diagnose Errors in Logic Programming -- The Case of an Ill-defined Domain}, - author={Le, Nguyen-Thinh and Menzel, Wolfgang}, - journal={International Journal of Artificial Intelligence in Education}, - volume={19}, - number={4}, - pages={381--400}, - year={2009}, - publisher={IOS Press} -} - -@inproceedings{levy2006tregex, - title={Tregex and Tsurgeon: tools for querying and manipulating tree data structures}, - author={Levy, Roger and Andrew, Galen}, - year={2006}, - booktitle={5th International Conference on Language Resources and Evaluation (LREC 2006)} -} - -@inproceedings{li2016measuring, - title={Measuring code behavioral similarity for programming and software engineering education}, - author={Li, Sihan and Xiao, Xusheng and Bassett, Blake and Xie, Tao and Tillmann, Nikolai}, - booktitle={Proceedings of the 38th International Conference on Software Engineering Companion}, - pages={501--510}, - year={2016}, - organization={ACM} -} - -@article{mitrovic2012fifteen, - title={Fifteen years of constraint-based tutors: what we have achieved and where we are going}, - author={Mitrovic, Antonija}, - journal={User Modeling and User-Adapted Interaction}, - volume={22}, - number={1-2}, - pages={39--72}, - year={2012}, - COMMENTpublisher={Springer}, -} - -@inproceedings{nguyen2014codewebs, - title={Codewebs: scalable homework search for massive open online programming courses}, - author={Nguyen, Andy and Piech, Christopher and Huang, Jonathan and Guibas, Leonidas}, - booktitle={Proc. 23rd Int'l World Wide Web Conf. (WWW 14)}, - pages={491--502}, - year={2014}, - COMMENTorganization={ACM}, -} - -@incollection{ohlsson1994constraint-based, - title={Constraint-based student modeling}, - author={Ohlsson, Stellan}, - booktitle={Student modelling: the key to individualized knowledge-based instruction}, - pages={167--189}, - year={1994}, - publisher={Springer} -} - -@inproceedings{piech2015autonomously, - title={Autonomously generating hints by inferring problem solving policies}, - author={Piech, Chris and Sahami, Mehran and Huang, Jonathan and Guibas, Leonidas}, - booktitle={Proc. 2nd ACM Conference on Learning @ Scale (L@S 2015)}, - pages={195--204}, - year={2015}, - COMMENTorganization={ACM}, -} - -@inproceedings{price2016generating, - author = {Thomas W. Price and Yihuan Dong and Tiffany Barnes}, - title = {Generating Data-driven Hints for Open-ended Programming}, - booktitle = {Proceedings of the 9th International Conference on Educational Data Mining, {EDM} 2016, Raleigh, North Carolina, USA, June 29 - July 2, 2016}, - pages = {191--198}, - year = {2016}, -} - -@article{rivers2015data-driven, - title={Data-Driven Hint Generation in Vast Solution Spaces: a Self-Improving {Python} Programming Tutor}, - author={Rivers, Kelly and Koedinger, Kenneth R}, - journal={International Journal of Artificial Intelligence in Education}, - pages={1--28}, - year={2015}, - doi={10.1007/s40593-015-0070-z}, - url={http://dx.doi.org/10.1007/s40593-015-0070-z}, -} - -@inproceedings{schleimer2003winnowing, - title={Winnowing: local algorithms for document fingerprinting}, - author={Schleimer, Saul and Wilkerson, Daniel S and Aiken, Alex}, - booktitle={Proceedings of the 2003 ACM SIGMOD international conference on Management of data}, - pages={76--85}, - year={2003}, - organization={ACM} -} - -@article{singh2013automated, - title={Automated feedback generation for introductory programming assignments}, - author={Singh, Rishabh and Gulwani, Sumit and Solar-Lezama, Armando}, - journal={ACM SIGPLAN Notices}, - volume={48}, - number={6}, - pages={15--26}, - year={2013}, - publisher={ACM} -} - -@article{sison2000multistrategy, - title={Multistrategy discovery and detection of novice programmer errors}, - author={Sison, Raymund C and Numao, Masayuki and Shimura, Masamichi}, - journal={Machine Learning}, - volume={38}, - number={1-2}, - pages={157--180}, - year={2000}, - publisher={Springer} -} - -@inproceedings{stamper2008hint, - title={The Hint Factory: Automatic generation of contextualized help for existing computer aided instruction}, - author={Stamper, John and Barnes, Tiffany and Lehmann, Lorrie and Croy, Marvin}, - booktitle={Proceedings of the 9th International Conference on Intelligent Tutoring Systems Young Researchers Track}, - pages={71--78}, - year={2008} -} - -@article{stamper2013experimental, - title={Experimental evaluation of automatic hint generation for a logic tutor}, - author={Stamper, John and Eagle, Michael and Barnes, Tiffany and Croy, Marvin}, - journal={International Journal of Artificial Intelligence in Education}, - volume={22}, - number={1-2}, - pages={3--17}, - year={2013}, - publisher={IOS Press} -} - -@inproceedings{suarez2008automatic, - title={Automatic Construction of a Bug Library for Object-Oriented Novice Java Programmer Errors}, - author={Suarez, Merlin and Sison, Raymund}, - booktitle={Proceedings of the 9th International Conference on Intelligent Tutoring Systems}, - pages={184}, - year={2008}, - organization={Springer Science \& Business Media} -} - -@article{vanlehn2006behavior, - title={The behavior of tutoring systems}, - author={{VanLehn}, Kurt}, - journal={International Journal of Artificial Intelligence in Education}, - volume={16}, - number={3}, - pages={227--265}, - year={2006}, - publisher={IOS Press} -} - -@article{xu2003transformation-based, - title={Transformation-based diagnosis of student programs for programming tutoring systems}, - author={Xu, Songwen and San Chee, Yam}, - journal={IEEE Transactions on Software Engineering}, - volume={29}, - number={4}, - pages={360--384}, - year={2003}, - publisher={IEEE} -} - -@article{zhang1989simple, - title={Simple fast algorithms for the editing distance between trees and related problems}, - author={Zhang, Kaizhong and Shasha, Dennis}, - journal={SIAM journal on computing}, - volume={18}, - number={6}, - pages={1245--1262}, - year={1989}, - publisher={SIAM} -} - -@inproceedings{cn21991, - title={Rule Induction with {CN2}: Some Recent Improvements}, - author={Peter Clark and Robin Boswell}, - booktitle={Proceedings of the Fifth European Conference on Machine Learning}, - pages={151--163}, - year={1991} -} diff --git a/aied2017/aied2017.tex b/aied2017/aied2017.tex deleted file mode 100644 index 3faaaf8..0000000 --- a/aied2017/aied2017.tex +++ /dev/null @@ -1,63 +0,0 @@ -\documentclass{llncs} - -\usepackage[utf8]{inputenc} - -% reclaim some plain-text sanity -\usepackage{newunicodechar} -\newunicodechar{∧}{\ensuremath{\land}} -\newunicodechar{⇒}{\ensuremath{\Rightarrow}} -\newunicodechar{⋯}{\ensuremath{\cdots}} - -\usepackage{fancyvrb} -\fvset{commandchars=\\\{\},baselinestretch=0.98,samepage=true,xleftmargin=2.5mm} -% WAT — I don’t even… -\makeatletter -\begingroup -\catcode`\`=\active -\gdef\FV@fontfamily@sf{% - \def\FV@FontScanPrep{\FV@MakeActive\`}% - \def\FV@FontFamily{\sffamily\edef`{{\string`}}}} -\endgroup -\makeatother - -\usepackage{forest} -\usetikzlibrary{arrows.meta} - -\usepackage{hyperref} - -\newcommand\code[1]{\texttt{#1}} -\newcommand\red[1]{{\begingroup\color[rgb]{0.8,0.15,0.15}#1\endgroup}} -\newcommand\green[1]{{\begingroup\color[rgb]{0.2,0.7,0.2}#1\endgroup}} -\newcommand\hl[1]{\textbf{#1}} - -\begin{document} - -\title{Automatic extraction of AST patterns \\ for debugging student programs} -\author{Timotej Lazar, Martin Možina, Ivan Bratko} -\institute{University of Ljubljana, Faculty of Computer and Information Science, Slovenia} -\maketitle - -\begin{abstract} -% motivation -When implementing a programming tutor, it is often difficult to manually consider all possible errors encountered by students. An alternative is to automatically learn a bug library of erroneous patterns from students’ programs. -% learning -We propose using abstract-syntax-tree (AST) patterns as features for learning rules to distinguish between correct and incorrect programs. These rules can be used for debugging student programs: rules for incorrect programs (buggy rules) contain patterns indicating mistakes, whereas rules for correct programs cover subsets of submissions sharing the same solution strategy. -% generating hints -To generate hints, we first check all buggy rules and point out incorrect patterns. If no buggy rule matches, rules for correct programs are used to recognize the student’s intent and suggest patterns that still need to be implemented. -% evaluation -We evaluated our approach on past student programming data for a number of Prolog problems. For 31 out of 44 problems, the induced rules correctly classified over 85\% of programs based only on their structural features. For approximately 73\% of incorrect submissions, we were able to generate hints that were implemented by the student in some subsequent submission. -\\\\ -\textbf{Keywords:} Programming tutors · Error diagnosis · Hint generation · Abstract syntax tree · Syntactic features -\end{abstract} - -\input{introduction} -\input{background} -\input{patterns} -\input{method} -\input{evaluation} -\input{conclusion} - -\bibliographystyle{splncs} -\bibliography{aied2017} - -\end{document} diff --git a/aied2017/background.tex b/aied2017/background.tex deleted file mode 100644 index 9457b35..0000000 --- a/aied2017/background.tex +++ /dev/null @@ -1,25 +0,0 @@ -\section{Background} - -% solution strategies -Several programming tutors generate hints from differences between the student’s program and a predefined set of possible solutions. The possible solution strategies for each problem can be given as a set of programs, or specified in a domain-specific language. Both Johnson’s Pascal tutor~\cite{johnson1990understanding} and Hong’s Prolog tutor~\cite{hong2004guided} perform hierarchical goal decomposition based on predefined programming \emph{plans} or \emph{techniques} to determine the student’s intent. Gerdes et al. use a small set of annotated model programs to derive solution strategies, which function in a similar way~\cite{gerdes2016ask-elle}. - -% canonicalization -Rivers and Koedinger compare students’ programs directly to a database of previous correct submissions~\cite{rivers2015data-driven}. They reduce program variability using equivalence-preserving transformations, such as inlining functions and reordering binary expressions. Hints are generated by suggesting a minimal correct step leading from the current submission to the closest correct program. - -% unit tests -Another option is to compare program behavior. Nguyen et al. classify programs according to results on a preselected set of test inputs~\cite{nguyen2014codewebs}. Li et al. generate test cases to distinguish between programs by selecting inputs that exercise different code paths in the program~\cite{li2016measuring}. Such tutors can point out pertinent failing test cases, which can be very helpful. However, to do any sort of conceptual analysis of student programs, we need to define some language for describing invariant properties of programs. - -% constraints -\emph{Constraints}~\cite{mitrovic2012fifteen} encode domain principles using if-then rules with relevance and satisfaction conditions, e.g. “if a function has a non-void return type, then it must have a return statement”~\cite{holland2009j-latte}. If a program violates a constraint, the tutor displays a predefined message. Le’s Prolog tutor improves constraint-based diagnosis by assigning weights to different types of constraints~\cite{le2009using}. - -% data-driven tutors -Jin et al. use \emph{linkage graphs} to describe data dependencies between the program’s statements~\cite{jin2012program}; we use AST patterns in a similar way. Nguyen et al. analyzed submissions in a large machine-learning course to learn a vocabulary of \emph{code phrases}: subtrees of submissions’ abstract syntax trees that perform the same function in a given context~\cite{nguyen2014codewebs}. By swapping parts between different programs, they built up a search library of functionally equivalent AST subtrees within a given context. - -% tree-regular expressions -The idea for AST patterns comes from Tregex -- \emph{tree regular expressions}, mainly used in the field of natural-language processing~\cite{levy2006tregex}. Tregex patterns can encode complex relations between nodes, but can become unwieldy; in this paper we use a simpler s-expression syntax. Another language for describing tree patterns using s-expressions is \emph{trx}, which additionally supports choice, repetition and other operators~\cite{bagrak2004trx}. - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "aied2017" -%%% End: \ No newline at end of file diff --git a/aied2017/conclusion.tex b/aied2017/conclusion.tex deleted file mode 100644 index 331424f..0000000 --- a/aied2017/conclusion.tex +++ /dev/null @@ -1,19 +0,0 @@ -\section{Conclusion} - -AST patterns can be used to describe the structure of a program’s AST. By encoding only relations between selected nodes, each pattern can match many programs. AST patterns thus function as “regular expressions for trees”. - -We presented a method for automatically extracting AST patterns from student programs. Our patterns encode relations between data objects in a program, with each pattern connecting either two instances of the same variable, a variable and a value, or two values. We consider such patterns as the atomic syntactic relations in a program, and use them as machine-learning features. - -We explained how patterns can be used as features to induce rules for classifying correct and incorrect programs. Since the goal of our research is to generate hints, we adapted the CN2 algorithm to produce rules useful for that purpose. We induce rules in two passes: we first learn the rules for incorrect programs, and then use the programs not covered by any such rule to learn the rules for correct programs. - -Evaluation shows that our patterns are useful for classifying Prolog programs. It is likely, however, that other programming languages will require different kinds of patterns. In Python, for example, variables can take on different values and appear in many places. This will likely require patterns relating more than two instances of a variable, or multiple variables. - -We showed how to generate hints based on rules by highlighting buggy patterns or pointing out what patterns are missing. Evaluation on past student data shows that useful hints can be provided for many incorrect submissions this way. The quality of feedback could be improved by annotating rules with explanations in natural language. Furthermore, since patterns and rules are easily interpretable, they can also help when manually authoring tutoring systems, by indicating the common errors and the typical solution strategies for each problem. - -In the future we will attempt to improve rule accuracy for certain problems, such as \code{union}. This will likely necessitate new kinds of patterns, for example to handle the cut operator. Adapting our methods to handle Python programs will give us some insight into what kinds of patterns could be useful in different situations. Finally, we will implement hint generation in an online programming tutor CodeQ, and evaluate the effect of automatic feedback on students’ problem-solving. - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "aied2017" -%%% End: \ No newline at end of file diff --git a/aied2017/evaluation.tex b/aied2017/evaluation.tex deleted file mode 100644 index 32cb03c..0000000 --- a/aied2017/evaluation.tex +++ /dev/null @@ -1,71 +0,0 @@ -\section{Evaluation} -We evaluated our approach on 44 programming assignments. We preselected 70\% of students -whose submissions were used as learning data for rule learning. The submissions from -the remaining 30\% of students were used as testing data to evaluate classification accuracy of learned rules, -and to retrospectively evaluate quality of given hints. - -\begin{table}[t] -\caption{Results on five selected domains and averaged results over 44 domains. -Columns 2, 3, and 4 contain classification accuracies of our rule learning method, majority classifier, -and random forest, respectively. Columns 5 and 6 report the number of all generated buggy hints -and the number of hints that were actually implemented by students. The following three columns -contain the number of all generated intent hints (All), -the number of implemented hints (Imp) and the number of implemented alternative hints (Alt). -The numbers in the last column are student submission where hints could not be generated. -The bottom two rows give aggregated results (total and average) over all 44 domains.} -\centering - \begin{tabular}{|l|rrr|rr|rrr|r|} - \hline - Problem& \multicolumn{3}{c|}{CA} & \multicolumn{2}{c|}{Buggy hints} & \multicolumn{3}{c|}{Intent hints} & No hint \\ - \hline - & Rules & Maj & RF & All & Imp & All & Imp & Alt & \\ - \hline - sister & 0.988 & 0.719 & 0.983 & 128 & 128 & 127 & 84 & 26 & 34 \\ - del & 0.948 & 0.645 & 0.974 & 136 & 136 & 39 & 25 & 10 & 15 \\ - sum & 0.945 & 0.511 & 0.956 & 59 & 53 & 24 & 22 & 1 & 6 \\ - is\_sorted & 0.765 & 0.765 & 0.831 & 119 & 119 & 0 & 0 & 0 & 93 \\ - union & 0.785 & 0.783 & 0.813 & 106 & 106 & 182 & 66 & 7 & 6 \\ - ...& & & & & & & & & \\ - \hline - Total & & & & 3613 & 3508 & 2057 & 1160 & 244 & 1045 \\ - \hline - Average & 0.857 & 0.663 & 0.908 & 79.73 & 77.34 & 46.75 & 26.36 & 5.55 & 23.75 \\ - \hline - \end{tabular} -\label{table:eval} -\end{table} - -Table~\ref{table:eval} contains results on five selected problems (each representing a group of problems from one lab session), and averaged results over all 44 problems.\footnote{We report only a subset of results due to space restrictions. Full results and source code can be found at \url{https://ailab.si/ast-patterns/}. } The second, third, and fourth columns provide classification accuracies (CA) of the rule-based, majority, and random-forest classifiers on testing data. The majority classifier and the random forests method, -which had the best overall performance, serve as references for bad and good CA on particular data sets. - -For example, our rules correctly classified 99\% of testing instances for the \code{sister} problem, -the accuracy of the majority classifier was 66\%, and random forests achieved 98\%. CA of rules is also high for problems \code{del} and \code{sum}. It is lower, however, for \code{is\_sorted} and \code{union}, suggesting that the proposed set of AST patterns is insufficient for certain problems. Indeed, after analyzing the problem \code{is\_sorted}, -we observed that our patterns do not cover predicates with a single empty-list (\code{[]}) argument, which occurs as the base case in this problem. For this reason, the rule learning -algorithm failed to learn any C-rules and therefore all programs were classified as incorrect. In the case of \code{union}, many solutions use the cut (\code{!}) operator, which -is also ignored by our pattern generation algorithm. - -We evaluated the quality of hints on incorrect submissions from those student traces -that resulted in a correct program. In the case of the \code{sister} data set, there were 289 such incorrect submission out of 403 submissions in total. - -The columns captioned “Buggy hints” in Table~\ref{table:eval} contain evaluation of hints generated from rules -for incorrect programs (I-rules). For each generated buggy hint we checked whether -it was implemented by the student in the final submission. The column “All” is -the number of all generated buggy hints, while the column “Imp” is the number of -implemented hints. The results show high relevance of generated buggy hints, as 97\% (3508 out of 3613) of them were implemented in the final solution; in other words, the buggy pattern was removed. - -The intent hints are generated when the algorithm fails to find any buggy hints. The column “All” contains the number of generated intent hints, “Imp” the number of implemented main intent hints, and “Alt” is the number -of implemented alternative hints. Notice that the percentage of implemented intent hints is significantly lower -when compared to buggy hints: in the case of problem \code{sister} 84 out of 127 (66\%) hints were implemented, whereas in the case of problem \code{union} only 66 out of 182 (36\%) hints were implemented. On average, 56\% of main intent hints were implemented. - -The last column shows the number of submissions where no hints could be generated. This value is relatively high -for the \code{is\_sorted} problem, because the algorithm could not learn any C-rules and thus no intent hints were generated. - -To sum up, buggy hints seem to be good and reliable, since they are always implemented when presented, even when we tested them on past data -- the decisions of students were not influenced by these hints. The percentage of implemented intent hints is, on average, lower (56\%), which is still not a bad result, providing that it is difficult to determine the programmer’s intent. In 12\% (244 out 2057) of generated intent hints, students implemented an alternative hint that was identified by our algorithm. Overall we were able to generate hints for 84.5\% of incorrect submissions. Of those hints, 86\% were implemented (73\% of all incorrect submissions). - -Last but not least, high classification accuracies in many problems imply that it is possible to correctly determine the correctness of a program by simply checking for the presence of a small number of patterns. Our hypothesis is that there exist some crucial patterns in programs that students have difficulties with. When they figure out these patterns, implementing the rest of the program is usually straightforward. - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "aied2017" -%%% End: \ No newline at end of file diff --git a/aied2017/introduction.tex b/aied2017/introduction.tex deleted file mode 100644 index 13dc9ec..0000000 --- a/aied2017/introduction.tex +++ /dev/null @@ -1,27 +0,0 @@ -\section{Introduction} - -% why automatic feedback -Programming education is becoming increasingly accessible with massive online courses. Since thousands of students can attend such courses, it is impossible for teachers to individually evaluate each participant’s work. On the other hand, in-time feedback that directly addresses students’ mistakes can aid the learning process. Providing feedback automatically could thus greatly enhance these courses. - -% ITS background -Traditional programming tutors use manually constructed domain models to generate feedback. Model-tracing tutors simulate the problem-solving \emph{process}: how students program. This is challenging because there are no well-defined steps when writing a program. Many tutors instead only analyze individual programs submitted by students, and disregard how a program evolved. They often use models coded in terms of constraints or bug libraries~\cite{keuning2016towards}. - -% data-driven domain modeling -Developing a domain model requires significant knowledge-engineering effort~\cite{folsom-kovarik2010plan}. This is particularly true for programming tutors, where most problems have several alternative solutions with many possible implementations~\cite{le2013operationalizing}. Data-driven tutors reduce the necessary effort by mining educational data -- often from online courses -- to learn common errors and generate feedback~\cite{rivers2015data-driven,nguyen2014codewebs,jin2012program}. - -% problem statement -This paper addresses the problem of finding useful features to support data mining in programming tutors. Features should be robust against irrelevant variations in program code (such as renaming a variable) and relatable to the knowledge components of the target skill (programming), so as to support hint generation. - -% our approach: patterns + rules -We describe features with \emph{abstract-syntax-tree patterns} that encode relations between nodes in a program’s abstract syntax tree. We use patterns that describe a path between pairs of leaf nodes referring to variables or values. By omitting some nodes on these paths, patterns can match different programs containing the same relation. We then induce rules to predict program correctness from AST patterns, allowing us to generate hints based on missing or incorrect patterns. - -% evaluation -We evaluated our approach on existing Prolog programs submitted by students during past lab sessions of a second-year university course. For 73\% of incorrect submissions we are able to suggest potentially useful patterns -- those that the students had actually implemented in the final, correct programs. - -% contributions -The main contributions presented in this paper are: AST patterns as features for machine learning, a rule-based model for predicting program correctness, and hint generation from incorrect or missing patterns in student programs. - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "aied2017" -%%% End: diff --git a/aied2017/method.tex b/aied2017/method.tex deleted file mode 100644 index 5e27229..0000000 --- a/aied2017/method.tex +++ /dev/null @@ -1,82 +0,0 @@ -\section{Method} - -This section explains the three main components of our approach: automatically extracting patterns from student submissions, learning classification rules for correct and incorrect programs, and using those rules to generate hints. - -\subsection{Extracting patterns} -\label{sec:extracting-patterns} - -We construct patterns by connecting pairs of leaf nodes in a program’s AST. For this paper we always select a pair of nodes from the same clause: either two nodes referring to the same variable (like the examples in Fig.~\ref{fig:sister}), or a value (such as the empty list \code{[]} or the number \code{0}) and another variable or value in the same \textsf{compound} or \textsf{binop} (like the blue dotted pattern in Fig.~\ref{fig:sum}). For example, in the clause (the second occurrence of each variable -- \code{A}, \code{B} and \code{C} -- is marked with~’ for disambiguation) - -\begin{Verbatim} -a(A,\textsf{ }B):- - b(A\textsf{'},\textsf{ }C), - B\textsf{'} is C\textsf{'}\textsf{ }+\textsf{ }1. -\end{Verbatim} - -\noindent -we select the following pairs of nodes: \{\code{A},\,\code{A\textsf{'}}\}, \{\code{B},\,\code{B\textsf{'}}\}, \{\code{C},\,\code{C\textsf{'}}\}, \{\code{B\textsf{'}},\,\code{1}\} and \{\code{C\textsf{'}},\,\code{1}\}. - -For each selected pair of leaf nodes $(a,b)$ we build a pattern by walking the AST in depth-first order, and recording nodes that lie on the paths to $a$ and $b$. We omit \textsf{and} nodes, as explained in the previous section. We also include certain nodes that do not lie on a path to any selected leaf. Specifically, we include the functor or operator of all \textsf{compound}, \textsf{binop} or \textsf{unop} nodes containing $a$ or $b$. - -Patterns constructed in this way form the set of features for rule learning. To keep this set at a reasonable size, we only use patterns that have occurred at least five times in submitted programs. - -\subsection{Learning rules} - -We represent students’ programs in the feature space of AST patterns described above. Each pattern corresponds to one binary feature with value $true$ when the pattern is present and $false$ when it is absent. We classify programs as correct or incorrect based on predefined unit tests for each problem, and use these labels for machine learning. - -Since programs can be validated with appropriate unit tests, our goal is not classifying new submissions, but rather to discover patterns associated with program correctness. This approach to machine learning is called \emph{descriptive induction} -- the automatic discovery of patterns describing regularities in data. We use rule learning for this task, because conditions of rules can be easily translated to hints. - -Before explaining the algorithm, let us discuss the reasons why a program can be incorrect. Our experience indicates that bugs in student programs can often be described by 1) some incorrect or \emph{buggy} pattern, which needs to be removed, or 2) some missing relation (pattern) that should be included before the program can be correct. We shall now explain how both types of errors can be identified with rules. - -To discover buggy patterns, the algorithm first learns rules that describe incorrect programs (I-rules). We use a variant of the CN2 algorithm~\cite{cn21991} implemented within the Orange data-mining toolbox~\cite{demsar2013orange}. Since we use rules to generate hints, and since hints should not be presented to students unless they are likely to be correct, we impose additional constraints on the rule learner: -\begin{enumerate} - \item The classification accuracy of each learned rule must exceed a certain threshold (90\% in our experiments). - \item Each conjunct in a condition must be significant with respect to the likelihood-ratio test (in our experiments significance threshold was set to $p=0.05$). - \item A conjunct can only specify the presence of a pattern: we allow feature-value pairs with only $true$ as value. -\end{enumerate} - -\noindent The former two constraints are needed to induce good rules with significant patterns, while the latter constraint assures that rules mention only presence (and not absence) of patterns as reasons for a program to be incorrect. This is important, since conditions of I-rules ought to contain patterns symptomatic of incorrect programs. - -With respect to the second type of error, we could try the same approach and learn rules using the above algorithm for the class of correct programs (C-rules). Having accurate rules for correct programs, the conditional part of these rules would define sufficient combinations of patterns that render a program correct. -It turns out that it is difficult to learn accurate rules for correct programs without specifying absent patterns. If specifying absence of patterns were allowed in rules' condition, learning would result in too many C-rules. - -A possible way to solve this problem is to remove programs that are covered by rules for incorrect class. This way all known buggy patterns are removed from the data, therefore will not be included in C-rules. However, removing incorrect patterns also removes the need for relevant patterns. For example, if all incorrect programs were removed, the single C-rule “$\mathsf{true} ⇒ \mathsf{correct}$” would suffice, which cannot be used to generate hints. We achieved the best results by learning from both data sets, using the original data set (with all programs) to learn rules, and the reduced data set to test whether a rule achieves the required classification accuracy (90\%). - -Even though our main interest is discovery of patterns, we can still use induced rules to classify new programs, for example to evaluate the quality of rules. The classification procedure has three steps: 1) if an I-rule covers the program, classify it as incorrect; 2) else if a C-rule covers the program, classify it as correct; 3) otherwise, if no rule covers the program, classify it as incorrect. - -\subsection{Generating hints} - -Once we have induced the rules for a given problem, we can use them to provide hints based on buggy or missing patterns. To generate a hint for an incorrect program, each rule is considered in turn. We consider two types of feedback: \emph{buggy} hints based on I-rules, and \emph{intent} hints based on C-rules. - -First, all I-rules are checked to find any known incorrect patterns in the program. To find the most likely incorrect patterns, the rules are considered in the order of decreasing quality. If all patterns in the rule “$p_1 ∧ ⋯ ∧ p_k ⇒ \mathsf{incorrect}$” match, we highlight the relevant leaf nodes. As an aside, we found that most I-rules are based on the presence of a single pattern. For the incorrect \code{sum} program from the previous section, our method produces the following highlight - -\begin{Verbatim} -sum([],0). % \textit{base case:} the empty list sums to zero -sum([H|T],\red{\underline{Sum}}):- % \textit{recursive case:} - sum(T,\red{\underline{Sum}}), % sum the tail and - Sum is Sum + H. % add first element (\textit{bug:} reused variable) -\end{Verbatim} - -\noindent -based on the rule “$p ⇒ \mathsf{incorrect}$”, where $p$ corresponds to the solid red pattern in Fig.~\ref{fig:sum}. This rule covers 36 incorrect programs, and one correct program using an unusual solution strategy. - -If no I-rule matches the program, we use C-rules to determine the student’s intent. C-rules group patterns that together indicate a high likelihood that the program is correct. Each C-rule thus defines a particular “solution strategy” in terms of AST patterns. We reason that alerting the student to a missing pattern could help them complete the program without revealing the whole solution. - -When generating a hint from C-rules, we consider all \emph{partially matching} rules “$p_1 ∧ ⋯ ∧ p_k ⇒ \mathsf{correct}$”, where the student’s program matches some (but not all) patterns $p_i$. For each such rule we store the number of matching patterns, and the set of missing patterns. We then return the most common missing pattern among the rules with most matching patterns. - -For example, if we find the following missing pattern for an incorrect program implementing the \code{sister} predicate: - -\begin{Verbatim}[fontfamily=sf] -(clause (head (compound (functor ‘\code{sister}’) (args var))) (binop var ‘\code{\textbackslash{}=}’))\textrm{,} -\end{Verbatim} - -\noindent -we could display a message to the student saying “comparison between \code{X} and some other value is missing”, or “your program is missing the goal \code{X} \code{\textbackslash{}=} \code{?}”. - -This method can find more than one missing pattern for a given partial program. In such cases we return the most commonly occurring pattern as the main hint, and other candidate patterns as alternative hints. We use main and alternative intent hints to establish the upper and lower bounds when evaluating automatic hints. - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "aied2017" -%%% End: \ No newline at end of file diff --git a/aied2017/outline.org b/aied2017/outline.org deleted file mode 100644 index a70c1a3..0000000 --- a/aied2017/outline.org +++ /dev/null @@ -1,81 +0,0 @@ -* Introduction (1) :Timotej: -** Generics -ITSs are useful, but expensive to create. Internet increases the reach of ITSs (think MOOC) and allows us to gather a lot of data. The challenge is to extract useful information from this data to improve feedback to students. - - koedinger2013new - -** Programming ITS -A particularly challenging domain: - - no meaningful “solutions steps” - - no easily discernible and independent “program features” - -** Problem -Find good features to describe student programs that: - - are automatically learnable - - can serve as a basis for generating hints - -** Contributions -A pattern language for describing program features. -Rules for classifying correct/incorrect Prolog programs. - -* Background (2) :Timotej: -Model-tracing is not well-suited for programming tutors. - - Hint Factory (Barnes & Stamper) for programming (Jin, Rivers) - - student traces are basically insane anyway, so learning from them is not a good idea (Piech 2015) -Instead we focus on analyzing individual submissions - CBM (maybe just /like/ CBM?). - - why: human tutors are usually able to comment without referring to previous program versions - -Analyzing program based on formal descriptions (usually defined manually): - - plans (Johnson) - - techniques (Brna/Huang/Le) - - strategies (Gerdes) -Comparing programs directly / after canonicalization (Rivers 2015) - - not easy to determine which differences are crucial - -Defining “programming attributes/features”: - - AST subtrees (Nguyen 2014) - - linkage graphs (Jin 2012) - - n-grams or other purely textual attributes - - mostly used for plagiarism (MOSS) / code clone detection - -Tgrep / trx for structural patterns in NLP - use the same for describing code! - -* Dataset (3) :Timotej: -Solution traces -** Motivating example: ancestor/2 - - show and explain correct / buggy program - - examples of patterns & rules - - buggy rules ≈ error classes - - correct rules ≈ program strategies or “plans” → determine intent - - how correct / buggy rules can be used to automatically determine intent and generate feedback - - using rules to assist authoring tutors - -** Patterns -Programming is all about manipulating data (variables/literals). -Patterns encode (only) relations between data nodes in an AST, and are more granular than subtrees. -Patterns abstract away some information -Algorithm for extracting patterns. - - which patterns are considered - -* Method (2) :Martin: -Rule learning. - - first learn correct, then buggy rules -Automatic hint generation. - -* Evaluation (3) :Martin:Timotej: -Example: describe patterns/rules for ancestor/2 (and/or another domain). - - example trace -CA/AUC for rules/RF/…. - Using e.g. RF with patterns for attributes achieves a high classification accuracy for correct/incorrect programs. - Rules are less accurate, but can be used for hints (how can we improve this)? -Simulated hints: for how many submissions would we suggest a hint that was ultimately implemented in the final solution? - Hints from buggy rules are almost always implemented; correct rules are less certain. - -* Conclusions / future work (1) - - syntactic variations (e.g. using = instead of implicit matching) - - combine with canonicalization - - ABML for rule learning - - integrate into a tutor (CodeQ / CloudCoder) to test in situ - -* Misc -Problems: - - predicates with similar clauses (e.g. union/3) diff --git a/aied2017/patterns.tex b/aied2017/patterns.tex deleted file mode 100644 index e1aa767..0000000 --- a/aied2017/patterns.tex +++ /dev/null @@ -1,191 +0,0 @@ -\section{AST patterns} - -In this section we describe AST patterns through examples, while Sect.~\ref{sec:extracting-patterns} explains how patterns are extracted from student programs. Consider the following Prolog program implementing the relation \code{sister(X,Y)}\footnote{Binary relations like this one should be read as “\code{X} is a sister/parent/… of \code{Y}”.}: - -\begin{Verbatim} -sister(X,Y):- % X is Y’s sister when: - parent(P,X), - parent(P,Y), % X and Y share a common parent P, - female(X), % X is female, and - X \textbackslash{}= Y. % X and Y are not the same person. -\end{Verbatim} - -Figure~\ref{fig:sister} shows the program’s AST with two patterns. The pattern drawn with blue dotted arrows encodes the fact that the first argument to the \code{sister} predicate also appears in the call to \code{female}. In other words, this pattern states that \code{X} must be female to be a sister. We write this pattern as the s-expression - -\begin{Verbatim}[fontfamily=sf] -(clause (head (compound (functor ‘\code{sister}’) (args var))) - (compound (functor ‘\code{female}’) (args var))) -\end{Verbatim} - -\begin{figure}[htbp] - \centering - \begin{forest} - for tree={ - font=\sf, - edge=gray, - s sep=0.05cm, - } -[text - [clause,name=top - [head,name=h - [compound,name=hc [functor,name=hf [sister,name=hfl]] - [args,name=ha [var,name=hav [X,name=havl,draw,rectangle,thick,blue,dotted,line width=0.4mm]] [args [var [Y]]]]]] - [and - [compound,name=g1 [functor,name=g1f [parent,name=g1fl]] - [args,name=g1a [var,name=g1av [P,name=g1avl,draw,rectangle,thick,red]] [args [var [X]]]]] - [and - [compound,name=g2 [functor,name=g2f [parent,name=g2fl]] - [args,name=g2a [var,name=g2av [P,name=g2avl,draw,rectangle,thick,red]] [args [var [Y]]]]] - [and - [compound,name=g3 [functor,name=g3f [female,name=g3fl]] - [args,name=g3a [var,name=g3av [X,name=g3avl,draw,rectangle,thick,blue,dotted,line width=0.4mm]]]] - [binop [var [X]] [\textbackslash{}{=}] [var [Y]]]]]]]] -% first pattern -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (top) edge[out=-10,in=-170] (g1); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1) edge[transform canvas={xshift=-1.5mm}] (g1f); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1f) edge[transform canvas={xshift=-1.1mm}] (g1fl); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1) edge[transform canvas={xshift=1.5mm}] (g1a); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1a) edge[transform canvas={xshift=-1.2mm}] (g1av); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (top) edge[out=-10,in=-170,transform canvas={xshift=-2mm}] (g2); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2) edge[transform canvas={xshift=-1.5mm}] (g2f); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2f) edge[transform canvas={xshift=-1.1mm}] (g2fl); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2) edge[transform canvas={xshift=1.5mm}] (g2a); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2a) edge[transform canvas={xshift=-1.2mm}] (g2av); -% second pattern -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (top) edge[transform canvas={xshift=-0.8mm,yshift=0.8mm}] (h); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (h) edge[transform canvas={xshift=-1mm}] (hc); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (hc) edge[transform canvas={xshift=-1.5mm}] (hf); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (hf) edge[transform canvas={xshift=-1.1mm}] (hfl); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (hc) edge[transform canvas={xshift=1.5mm}] (ha); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (ha) edge[transform canvas={xshift=-1.2mm}] (hav); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (top) edge[out=-5,in=160] (g3); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3) edge[transform canvas={xshift=-1.5mm}] (g3f); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3f) edge[transform canvas={xshift=-1.1mm}] (g3fl); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3) edge[transform canvas={xshift=1.5mm}] (g3a); -\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3a) edge[transform canvas={xshift=-1.2mm}] (g3av); - \end{forest} - \caption{The AST for the \code{sister} program, showing two patterns and the leaf nodes inducing them. Solid red arrows equate the first arguments in the two calls to \code{parent}. Dotted blue arrows encode the necessary condition that \code{X} must be female to be a sister.} - \label{fig:sister} -\end{figure} - -Every pattern used in this paper has the same basic structure, and describes paths from a \textsf{clause} node to one or two leaf nodes containing variables or values. All patterns in Figs.~\ref{fig:sister} and~\ref{fig:sum} are induced from such node pairs. For each leaf we also include some local context, for example the predicate name (e.g. \texttt{parent}). - -We regard these patterns as the smallest units of meaning in Prolog programs: each pattern encodes some interaction between two parts of the program. Including more than two leaf nodes in a pattern could make it difficult to pinpoint the exact error when generating hints. Each pattern contains at most two \textsf{var} nodes, so we require they both refer to the same variable -- relating two nodes with different variables would not tell us much about the program. This allows us to omit variable names from patterns. - -We handle syntactic variations in programs by omitting certain nodes from patterns. For example, by not including \textsf{and} nodes, the above pattern can match a clause regardless of the presence (or order) of other goals in its body (i.e., with any arrangement of \textsf{and} nodes in the AST). Order \emph{is} important for the nodes specified in the pattern; this is explained below. - -The second pattern in Fig.~\ref{fig:sister}, drawn with solid red arrows, encodes the fact that the two calls to \code{parent} share the first argument. In other words, \code{X}~and~\code{Y} must have the same parent~\code{P}. - -\begin{Verbatim}[fontfamily=sf] -(clause (compound (functor ‘\code{parent}’) (args var)) - (compound (functor ‘\code{parent}’) (args var))) -\end{Verbatim} - -Patterns describe relations between nodes in a program’s AST. Specifically, the pattern ($a$ $b$ $c$) means that the nodes $b$ and $c$ are descended from $a$, and that $b$ precedes $c$ in a depth-first tree walk. In general, an AST matches the pattern (\textsf{name} $p_1$ … $p_k$) if it contains a node $n$ labeled \textsf{name}; the subtree rooted at $n$ must also contain, in depth-first order, distinct nodes $n_1$ to $n_k$ matching subpatterns $p_1$ to $p_k$. The above pattern, for example, matches only the last of the following programs (the first program is missing one call to \code{parent}, and the second has different variables in positions encoded by the pattern): - -\begin{Verbatim} -\textit{\red{% nonmatching}} \textit{\red{% nonmatching}} \textit{\green{% matching}} -sister(X,Y):- sister(X,Y):- sister(X,Y):- - female(X), female(X), parent(A,X), - parent(P,X), parent(A,X), female(X), - X \textbackslash{}= Y. parent(B,Y), parent(A,Y), - X \textbackslash{}= Y. X \textbackslash{}= Y. -\end{Verbatim} - -A relation between any two objects in a program is insufficient to reason about the program’s behavior on the whole. In the tutoring context, however, there are patterns that strongly indicate the presence of certain bugs. Take for example the following incorrect program to sum a list: - -\begin{Verbatim} -sum([],0). % \textit{base case:} the empty list sums to zero -sum([H|T],Sum):- % \textit{recursive case:} - sum(T,Sum), % sum the tail and - Sum is Sum + H. % add first element (\textit{bug:} reused variable) -\end{Verbatim} - -This error is fairly common with Prolog novices: the variable \code{Sum} is used to represent both the sum of the whole list (line 2), and the sum of only the tail elements (line 3). The last line fails since Prolog cannot unify \code{Sum} with a (generally) different value of \code{Sum\,+\,H}. The program’s AST is displayed in Fig.~\ref{fig:sum}. - -\begin{figure}[htbp] - \centering - \begin{forest} -for tree={ - font=\sf, - edge=gray, - s sep=0.05cm, -} -[text - [clause,name=c1 - [head,name=c1h - [compound,name=c1hc [functor,name=c1hf [sum,name=c1hfv]] - [args,name=c1ha1 [\code{[\,]},name=c1ha1v,draw,rectangle,thick,dotted,line width=0.4mm,blue] - [args,name=c1ha2 [\code{0},name=c1ha2v,draw,rectangle,thick,dotted,line width=0.4mm,blue]]]]]] - [clause,name=c2 - [head,name=c2h - [compound,name=c2hc [functor,name=c2hf [sum,name=c2hfv]] - [args,name=c2ha1 [list [var [H]] [var [T]]] [args,name=c2ha2 [var,name=c2ha2v [Sum,draw,rectangle,thick,red]]]]]] - [and - [compound,name=c2c [functor,name=c2cf [sum,name=c2cfv]] - [args,name=c2ca1 [var [T]] [args,name=c2ca2 [var,name=c2ca2v [Sum,draw,rectangle,thick,red]]]]] - [binop,name=c2b [var,name=c2bv [Sum,draw,rectangle,thick,dashed,orange]] [is,name=c2bo] - [binop,name=c2bb [var,name=c2bbv [Sum,draw,rectangle,thick,dashed,orange]] [+,name=c2bbo] [var [H]]]]]]] -% first pattern -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1) edge[transform canvas={xshift=-1mm}] (c1h); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1h) edge[transform canvas={xshift=-1mm}] (c1hc); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1hc) edge[transform canvas={xshift=-1.2mm}] (c1hf); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1hf) edge[transform canvas={xshift=-1mm}] (c1hfv); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1hc) edge[transform canvas={xshift=1.2mm}] (c1ha1); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1ha1) edge[transform canvas={xshift=-1.2mm}] (c1ha1v); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1ha1) edge[transform canvas={xshift=1.2mm}] (c1ha2); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1ha2) edge[transform canvas={xshift=1mm}] (c1ha2v); -% second pattern -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2) edge[transform canvas={xshift=-0.8mm,yshift=0.8mm}] (c2h); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2h) edge[transform canvas={xshift=-1mm}] (c2hc); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2hc) edge[transform canvas={xshift=-1.2mm}] (c2hf); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2hf) edge[transform canvas={xshift=-0.8mm,yshift=0.8mm}] (c2hfv); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2hc) edge[transform canvas={xshift=1.2mm}] (c2ha1); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ha1) edge[transform canvas={xshift=1.2mm}] (c2ha2); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ha2) edge[transform canvas={xshift=1mm}] (c2ha2v); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2) edge[out=-15,in=-170] (c2c); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2c) edge[transform canvas={xshift=-1.2mm}] (c2cf); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2cf) edge[transform canvas={xshift=-1mm}] (c2cfv); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2c) edge[transform canvas={xshift=1.2mm}] (c2ca1); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ca1) edge[transform canvas={xshift=1.2mm}] (c2ca2); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ca2) edge[transform canvas={xshift=1mm}] (c2ca2v); -% third pattern -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2) edge[out=20,in=150] (c2b); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2b) edge[transform canvas={xshift=-1.1mm}] (c2bv); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2b) edge[transform canvas={xshift=0.8mm}] (c2bo); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2b) edge[transform canvas={xshift=1mm}] (c2bb); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2bb) edge[transform canvas={xshift=-1mm}] (c2bbv); -\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2bb) edge[transform canvas={xshift=0.9mm}] (c2bbo); - \end{forest} - \caption{The AST for the buggy \texttt{sum} program. Dotted arrows relate the correct values in the base case. Solid and dashed arrows denote two patterns describing incorrect reuse of the \code{Sum} variable in the recursive case.} - \label{fig:sum} -\end{figure} - -Various patterns capture this mistake. Solid red arrows in Fig.~\ref{fig:sum} show one example -- \code{Sum} returned by the predicate should not be the same as the \code{Sum} from the recursive call: - -\begin{Verbatim}[fontfamily=sf] -(clause (head (compound (functor ‘\code{sum}’) (args (args var)))) - (compound (functor ‘\code{sum}’) (args (args var)))) -\end{Verbatim} - -\noindent -The second pattern, drawn with dashed orange arrows in the figure, indicates the likely error in the arithmetic expression: - -\begin{Verbatim}[fontfamily=sf] -(clause (binop var ‘\code{is}’ (binop var ‘\code{+}’))) -\end{Verbatim} - -The leftmost pattern in Fig.~\ref{fig:sum}, drawn with dotted blue arrows, describes the correct relation between the two constants in the base-case rule: - -\begin{Verbatim}[fontfamily=sf] -(clause (head (compound (functor ‘\code{sum}’) (args \code{[]} (args \code{0}))))) -\end{Verbatim} - -\noindent -We include such patterns in our feature set to cover the base-case clauses in recursive programs, which often include no variables. - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "aied2017" -%%% End: \ No newline at end of file diff --git a/paper/aied2017.bib b/paper/aied2017.bib new file mode 100644 index 0000000..af09d8a --- /dev/null +++ b/paper/aied2017.bib @@ -0,0 +1,328 @@ +@inproceedings{bagrak2004trx, + title={{trx}: Regular-tree expressions, now in Scheme}, + author={Bagrak, Ilya and Shivers, Olin}, + booktitle={Proceedings of the Fifth Workshop on Scheme and Functional Programming}, + pages={21--32}, + year={2004} +} + +@article{demsar2013orange, + author = {Janez Demšar and Tomaž Curk and Aleš Erjavec and Črt Gorup and Tomaž Hočevar and Mitar Milutinovič and Martin Možina and Matija Polajnar and Marko Toplak and Anže Starič and Miha Štajdohar and Lan Umek and Lan Žagar and Jure Žbontar and Marinka Žitnik and Blaž Zupan}, + title = {Orange: Data Mining Toolbox in Python}, + journal = {Journal of Machine Learning Research}, + year = {2013}, + volume = {14}, + pages = {2349-2353}, + url = {http://jmlr.org/papers/v14/demsar13a.html} +} + +@inproceedings{folsom-kovarik2010plan, + title={Plan ahead: Pricing {ITS} learner models}, + author={Folsom-Kovarik, Jeremiah T and Schatz, Sae and Nicholson, Denise}, + booktitle={Proc. 19th Behavior Representation in Modeling \& Simulation Conference}, + pages={47--54}, + year={2010} +} + +@article{fossati2015data, + title={Data driven automatic feedback generation in the {iList} intelligent tutoring system}, + author={Fossati, Davide and Di Eugenio, Barbara and Ohlsson, Stellan and Brown, Christopher and Chen, Lin}, + journal={Technology, Instruction, Cognition and Learning}, + volume={10}, + number={1}, + pages={5--26}, + year={2015} +} + +@article{gerdes2016ask-elle, + title={Ask-Elle: an adaptable programming tutor for Haskell giving automated feedback}, + author={Gerdes, Alex and Heeren, Bastiaan and Jeuring, Johan and van Binsbergen, L Thomas}, + journal={International Journal of Artificial Intelligence in Education}, + pages={1--36}, + year={2016}, + publisher={Springer} +} + +@article{glassman2015overcode, + title={{OverCode}: Visualizing variation in student solutions to programming problems at scale}, + author={Glassman, Elena L and Scott, Jeremy and Singh, Rishabh and Guo, Philip J and Miller, Robert C}, + journal={ACM Transactions on Computer-Human Interaction (TOCHI)}, + volume={22}, + number={2}, + pages={7}, + year={2015}, + publisher={ACM} +} + +@inproceedings{holland2009j-latte, + title={{J-LATTE}: a Constraint-based Tutor for {Java}}, + author={Holland, Jay and Mitrovic, Antonija and Martin, Brent}, + booktitle={Proc. 17th Int'l Conf. Computers in Education (ICCE 2009)}, + pages={142--146}, + year={2009} +} + +@article{hong2004guided, + title={Guided programming and automated error analysis in an intelligent {Prolog} tutor}, + author={Hong, Jun}, + journal={International Journal of Human-Computer Studies}, + volume={61}, + number={4}, + pages={505--534}, + year={2004}, + COMMENTpublisher={Elsevier}, +} + +@inproceedings{jin2012program, + title={Program representation for automatic hint generation for a data-driven novice programming tutor}, + author={Jin, Wei and Barnes, Tiffany and Stamper, John and Eagle, Michael John and Johnson, Matthew W and Lehmann, Lorrie}, + booktitle={Proc. 11th Int'l Conf. Intelligent Tutoring Systems (ITS 12)}, + pages={304--309}, + year={2012}, +} + +@article{johnson1990understanding, + title={Understanding and debugging novice programs}, + author={Johnson, W Lewis}, + journal={Artificial Intelligence}, + volume={42}, + number={1}, + pages={51--97}, + year={1990}, + publisher={Elsevier} +} + +@article{kamiya2002ccfinder, + title={{CCFinder}: a multilinguistic token-based code clone detection system for large scale source code}, + author={Kamiya, Toshihiro and Kusumoto, Shinji and Inoue, Katsuro}, + journal={IEEE Transactions on Software Engineering}, + volume={28}, + number={7}, + pages={654--670}, + year={2002}, + publisher={IEEE} +} + +@inproceedings{keuning2016towards, + title={Towards a Systematic Review of Automated Feedback Generation for Programming Exercises}, + author={Keuning, Hieke and Jeuring, Johan and Heeren, Bastiaan}, + booktitle={Proceedings of the 2016 ACM Conference on Innovation and Technology in Computer Science Education}, + pages={41--46}, + year={2016}, + organization={ACM} +} + +@article{koedinger2013new, + title={New potentials for data-driven intelligent tutoring system development and optimization}, + author={Koedinger, Kenneth R and Brunskill, Emma and Baker, Ryan S J d and McLaughlin, Elizabeth A and Stamper, John}, + journal={AI Magazine}, + volume={34}, + number={3}, + pages={27--41}, + year={2013} +} + +@article{le2013operationalizing, + title={Operationalizing the continuum between well-defined and ill-defined problems for educational technology}, + author={Le, Nguyen-Thinh and Loll, Frank and Pinkwart, Niels}, + journal={IEEE Transactions on Learning Technologies}, + volume={6}, + number={3}, + pages={258--270}, + year={2013}, + publisher={IEEE} +} + +@article{le2016classification, + title={A Classification of Adaptive Feedback in Educational Systems for Programming}, + author={Le, Nguyen-Thinh}, + journal={Systems}, + volume={4}, + number={2}, + pages={22}, + year={2016}, + publisher={Multidisciplinary Digital Publishing Institute} +} + +@article{le2009using, + title={Using Weighted Constraints to Diagnose Errors in Logic Programming -- The Case of an Ill-defined Domain}, + author={Le, Nguyen-Thinh and Menzel, Wolfgang}, + journal={International Journal of Artificial Intelligence in Education}, + volume={19}, + number={4}, + pages={381--400}, + year={2009}, + publisher={IOS Press} +} + +@inproceedings{levy2006tregex, + title={Tregex and Tsurgeon: tools for querying and manipulating tree data structures}, + author={Levy, Roger and Andrew, Galen}, + year={2006}, + booktitle={5th International Conference on Language Resources and Evaluation (LREC 2006)} +} + +@inproceedings{li2016measuring, + title={Measuring code behavioral similarity for programming and software engineering education}, + author={Li, Sihan and Xiao, Xusheng and Bassett, Blake and Xie, Tao and Tillmann, Nikolai}, + booktitle={Proceedings of the 38th International Conference on Software Engineering Companion}, + pages={501--510}, + year={2016}, + organization={ACM} +} + +@article{mitrovic2012fifteen, + title={Fifteen years of constraint-based tutors: what we have achieved and where we are going}, + author={Mitrovic, Antonija}, + journal={User Modeling and User-Adapted Interaction}, + volume={22}, + number={1-2}, + pages={39--72}, + year={2012}, + COMMENTpublisher={Springer}, +} + +@inproceedings{nguyen2014codewebs, + title={Codewebs: scalable homework search for massive open online programming courses}, + author={Nguyen, Andy and Piech, Christopher and Huang, Jonathan and Guibas, Leonidas}, + booktitle={Proc. 23rd Int'l World Wide Web Conf. (WWW 14)}, + pages={491--502}, + year={2014}, + COMMENTorganization={ACM}, +} + +@incollection{ohlsson1994constraint-based, + title={Constraint-based student modeling}, + author={Ohlsson, Stellan}, + booktitle={Student modelling: the key to individualized knowledge-based instruction}, + pages={167--189}, + year={1994}, + publisher={Springer} +} + +@inproceedings{piech2015autonomously, + title={Autonomously generating hints by inferring problem solving policies}, + author={Piech, Chris and Sahami, Mehran and Huang, Jonathan and Guibas, Leonidas}, + booktitle={Proc. 2nd ACM Conference on Learning @ Scale (L@S 2015)}, + pages={195--204}, + year={2015}, + COMMENTorganization={ACM}, +} + +@inproceedings{price2016generating, + author = {Thomas W. Price and Yihuan Dong and Tiffany Barnes}, + title = {Generating Data-driven Hints for Open-ended Programming}, + booktitle = {Proceedings of the 9th International Conference on Educational Data Mining, {EDM} 2016, Raleigh, North Carolina, USA, June 29 - July 2, 2016}, + pages = {191--198}, + year = {2016}, +} + +@article{rivers2015data-driven, + title={Data-Driven Hint Generation in Vast Solution Spaces: a Self-Improving {Python} Programming Tutor}, + author={Rivers, Kelly and Koedinger, Kenneth R}, + journal={International Journal of Artificial Intelligence in Education}, + pages={1--28}, + year={2015}, + doi={10.1007/s40593-015-0070-z}, + url={http://dx.doi.org/10.1007/s40593-015-0070-z}, +} + +@inproceedings{schleimer2003winnowing, + title={Winnowing: local algorithms for document fingerprinting}, + author={Schleimer, Saul and Wilkerson, Daniel S and Aiken, Alex}, + booktitle={Proceedings of the 2003 ACM SIGMOD international conference on Management of data}, + pages={76--85}, + year={2003}, + organization={ACM} +} + +@article{singh2013automated, + title={Automated feedback generation for introductory programming assignments}, + author={Singh, Rishabh and Gulwani, Sumit and Solar-Lezama, Armando}, + journal={ACM SIGPLAN Notices}, + volume={48}, + number={6}, + pages={15--26}, + year={2013}, + publisher={ACM} +} + +@article{sison2000multistrategy, + title={Multistrategy discovery and detection of novice programmer errors}, + author={Sison, Raymund C and Numao, Masayuki and Shimura, Masamichi}, + journal={Machine Learning}, + volume={38}, + number={1-2}, + pages={157--180}, + year={2000}, + publisher={Springer} +} + +@inproceedings{stamper2008hint, + title={The Hint Factory: Automatic generation of contextualized help for existing computer aided instruction}, + author={Stamper, John and Barnes, Tiffany and Lehmann, Lorrie and Croy, Marvin}, + booktitle={Proceedings of the 9th International Conference on Intelligent Tutoring Systems Young Researchers Track}, + pages={71--78}, + year={2008} +} + +@article{stamper2013experimental, + title={Experimental evaluation of automatic hint generation for a logic tutor}, + author={Stamper, John and Eagle, Michael and Barnes, Tiffany and Croy, Marvin}, + journal={International Journal of Artificial Intelligence in Education}, + volume={22}, + number={1-2}, + pages={3--17}, + year={2013}, + publisher={IOS Press} +} + +@inproceedings{suarez2008automatic, + title={Automatic Construction of a Bug Library for Object-Oriented Novice Java Programmer Errors}, + author={Suarez, Merlin and Sison, Raymund}, + booktitle={Proceedings of the 9th International Conference on Intelligent Tutoring Systems}, + pages={184}, + year={2008}, + organization={Springer Science \& Business Media} +} + +@article{vanlehn2006behavior, + title={The behavior of tutoring systems}, + author={{VanLehn}, Kurt}, + journal={International Journal of Artificial Intelligence in Education}, + volume={16}, + number={3}, + pages={227--265}, + year={2006}, + publisher={IOS Press} +} + +@article{xu2003transformation-based, + title={Transformation-based diagnosis of student programs for programming tutoring systems}, + author={Xu, Songwen and San Chee, Yam}, + journal={IEEE Transactions on Software Engineering}, + volume={29}, + number={4}, + pages={360--384}, + year={2003}, + publisher={IEEE} +} + +@article{zhang1989simple, + title={Simple fast algorithms for the editing distance between trees and related problems}, + author={Zhang, Kaizhong and Shasha, Dennis}, + journal={SIAM journal on computing}, + volume={18}, + number={6}, + pages={1245--1262}, + year={1989}, + publisher={SIAM} +} + +@inproceedings{cn21991, + title={Rule Induction with {CN2}: Some Recent Improvements}, + author={Peter Clark and Robin Boswell}, + booktitle={Proceedings of the Fifth European Conference on Machine Learning}, + pages={151--163}, + year={1991} +} diff --git a/paper/aied2017.tex b/paper/aied2017.tex new file mode 100644 index 0000000..3faaaf8 --- /dev/null +++ b/paper/aied2017.tex @@ -0,0 +1,63 @@ +\documentclass{llncs} + +\usepackage[utf8]{inputenc} + +% reclaim some plain-text sanity +\usepackage{newunicodechar} +\newunicodechar{∧}{\ensuremath{\land}} +\newunicodechar{⇒}{\ensuremath{\Rightarrow}} +\newunicodechar{⋯}{\ensuremath{\cdots}} + +\usepackage{fancyvrb} +\fvset{commandchars=\\\{\},baselinestretch=0.98,samepage=true,xleftmargin=2.5mm} +% WAT — I don’t even… +\makeatletter +\begingroup +\catcode`\`=\active +\gdef\FV@fontfamily@sf{% + \def\FV@FontScanPrep{\FV@MakeActive\`}% + \def\FV@FontFamily{\sffamily\edef`{{\string`}}}} +\endgroup +\makeatother + +\usepackage{forest} +\usetikzlibrary{arrows.meta} + +\usepackage{hyperref} + +\newcommand\code[1]{\texttt{#1}} +\newcommand\red[1]{{\begingroup\color[rgb]{0.8,0.15,0.15}#1\endgroup}} +\newcommand\green[1]{{\begingroup\color[rgb]{0.2,0.7,0.2}#1\endgroup}} +\newcommand\hl[1]{\textbf{#1}} + +\begin{document} + +\title{Automatic extraction of AST patterns \\ for debugging student programs} +\author{Timotej Lazar, Martin Možina, Ivan Bratko} +\institute{University of Ljubljana, Faculty of Computer and Information Science, Slovenia} +\maketitle + +\begin{abstract} +% motivation +When implementing a programming tutor, it is often difficult to manually consider all possible errors encountered by students. An alternative is to automatically learn a bug library of erroneous patterns from students’ programs. +% learning +We propose using abstract-syntax-tree (AST) patterns as features for learning rules to distinguish between correct and incorrect programs. These rules can be used for debugging student programs: rules for incorrect programs (buggy rules) contain patterns indicating mistakes, whereas rules for correct programs cover subsets of submissions sharing the same solution strategy. +% generating hints +To generate hints, we first check all buggy rules and point out incorrect patterns. If no buggy rule matches, rules for correct programs are used to recognize the student’s intent and suggest patterns that still need to be implemented. +% evaluation +We evaluated our approach on past student programming data for a number of Prolog problems. For 31 out of 44 problems, the induced rules correctly classified over 85\% of programs based only on their structural features. For approximately 73\% of incorrect submissions, we were able to generate hints that were implemented by the student in some subsequent submission. +\\\\ +\textbf{Keywords:} Programming tutors · Error diagnosis · Hint generation · Abstract syntax tree · Syntactic features +\end{abstract} + +\input{introduction} +\input{background} +\input{patterns} +\input{method} +\input{evaluation} +\input{conclusion} + +\bibliographystyle{splncs} +\bibliography{aied2017} + +\end{document} diff --git a/paper/background.tex b/paper/background.tex new file mode 100644 index 0000000..9457b35 --- /dev/null +++ b/paper/background.tex @@ -0,0 +1,25 @@ +\section{Background} + +% solution strategies +Several programming tutors generate hints from differences between the student’s program and a predefined set of possible solutions. The possible solution strategies for each problem can be given as a set of programs, or specified in a domain-specific language. Both Johnson’s Pascal tutor~\cite{johnson1990understanding} and Hong’s Prolog tutor~\cite{hong2004guided} perform hierarchical goal decomposition based on predefined programming \emph{plans} or \emph{techniques} to determine the student’s intent. Gerdes et al. use a small set of annotated model programs to derive solution strategies, which function in a similar way~\cite{gerdes2016ask-elle}. + +% canonicalization +Rivers and Koedinger compare students’ programs directly to a database of previous correct submissions~\cite{rivers2015data-driven}. They reduce program variability using equivalence-preserving transformations, such as inlining functions and reordering binary expressions. Hints are generated by suggesting a minimal correct step leading from the current submission to the closest correct program. + +% unit tests +Another option is to compare program behavior. Nguyen et al. classify programs according to results on a preselected set of test inputs~\cite{nguyen2014codewebs}. Li et al. generate test cases to distinguish between programs by selecting inputs that exercise different code paths in the program~\cite{li2016measuring}. Such tutors can point out pertinent failing test cases, which can be very helpful. However, to do any sort of conceptual analysis of student programs, we need to define some language for describing invariant properties of programs. + +% constraints +\emph{Constraints}~\cite{mitrovic2012fifteen} encode domain principles using if-then rules with relevance and satisfaction conditions, e.g. “if a function has a non-void return type, then it must have a return statement”~\cite{holland2009j-latte}. If a program violates a constraint, the tutor displays a predefined message. Le’s Prolog tutor improves constraint-based diagnosis by assigning weights to different types of constraints~\cite{le2009using}. + +% data-driven tutors +Jin et al. use \emph{linkage graphs} to describe data dependencies between the program’s statements~\cite{jin2012program}; we use AST patterns in a similar way. Nguyen et al. analyzed submissions in a large machine-learning course to learn a vocabulary of \emph{code phrases}: subtrees of submissions’ abstract syntax trees that perform the same function in a given context~\cite{nguyen2014codewebs}. By swapping parts between different programs, they built up a search library of functionally equivalent AST subtrees within a given context. + +% tree-regular expressions +The idea for AST patterns comes from Tregex -- \emph{tree regular expressions}, mainly used in the field of natural-language processing~\cite{levy2006tregex}. Tregex patterns can encode complex relations between nodes, but can become unwieldy; in this paper we use a simpler s-expression syntax. Another language for describing tree patterns using s-expressions is \emph{trx}, which additionally supports choice, repetition and other operators~\cite{bagrak2004trx}. + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "aied2017" +%%% End: \ No newline at end of file diff --git a/paper/conclusion.tex b/paper/conclusion.tex new file mode 100644 index 0000000..331424f --- /dev/null +++ b/paper/conclusion.tex @@ -0,0 +1,19 @@ +\section{Conclusion} + +AST patterns can be used to describe the structure of a program’s AST. By encoding only relations between selected nodes, each pattern can match many programs. AST patterns thus function as “regular expressions for trees”. + +We presented a method for automatically extracting AST patterns from student programs. Our patterns encode relations between data objects in a program, with each pattern connecting either two instances of the same variable, a variable and a value, or two values. We consider such patterns as the atomic syntactic relations in a program, and use them as machine-learning features. + +We explained how patterns can be used as features to induce rules for classifying correct and incorrect programs. Since the goal of our research is to generate hints, we adapted the CN2 algorithm to produce rules useful for that purpose. We induce rules in two passes: we first learn the rules for incorrect programs, and then use the programs not covered by any such rule to learn the rules for correct programs. + +Evaluation shows that our patterns are useful for classifying Prolog programs. It is likely, however, that other programming languages will require different kinds of patterns. In Python, for example, variables can take on different values and appear in many places. This will likely require patterns relating more than two instances of a variable, or multiple variables. + +We showed how to generate hints based on rules by highlighting buggy patterns or pointing out what patterns are missing. Evaluation on past student data shows that useful hints can be provided for many incorrect submissions this way. The quality of feedback could be improved by annotating rules with explanations in natural language. Furthermore, since patterns and rules are easily interpretable, they can also help when manually authoring tutoring systems, by indicating the common errors and the typical solution strategies for each problem. + +In the future we will attempt to improve rule accuracy for certain problems, such as \code{union}. This will likely necessitate new kinds of patterns, for example to handle the cut operator. Adapting our methods to handle Python programs will give us some insight into what kinds of patterns could be useful in different situations. Finally, we will implement hint generation in an online programming tutor CodeQ, and evaluate the effect of automatic feedback on students’ problem-solving. + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "aied2017" +%%% End: \ No newline at end of file diff --git a/paper/evaluation.tex b/paper/evaluation.tex new file mode 100644 index 0000000..32cb03c --- /dev/null +++ b/paper/evaluation.tex @@ -0,0 +1,71 @@ +\section{Evaluation} +We evaluated our approach on 44 programming assignments. We preselected 70\% of students +whose submissions were used as learning data for rule learning. The submissions from +the remaining 30\% of students were used as testing data to evaluate classification accuracy of learned rules, +and to retrospectively evaluate quality of given hints. + +\begin{table}[t] +\caption{Results on five selected domains and averaged results over 44 domains. +Columns 2, 3, and 4 contain classification accuracies of our rule learning method, majority classifier, +and random forest, respectively. Columns 5 and 6 report the number of all generated buggy hints +and the number of hints that were actually implemented by students. The following three columns +contain the number of all generated intent hints (All), +the number of implemented hints (Imp) and the number of implemented alternative hints (Alt). +The numbers in the last column are student submission where hints could not be generated. +The bottom two rows give aggregated results (total and average) over all 44 domains.} +\centering + \begin{tabular}{|l|rrr|rr|rrr|r|} + \hline + Problem& \multicolumn{3}{c|}{CA} & \multicolumn{2}{c|}{Buggy hints} & \multicolumn{3}{c|}{Intent hints} & No hint \\ + \hline + & Rules & Maj & RF & All & Imp & All & Imp & Alt & \\ + \hline + sister & 0.988 & 0.719 & 0.983 & 128 & 128 & 127 & 84 & 26 & 34 \\ + del & 0.948 & 0.645 & 0.974 & 136 & 136 & 39 & 25 & 10 & 15 \\ + sum & 0.945 & 0.511 & 0.956 & 59 & 53 & 24 & 22 & 1 & 6 \\ + is\_sorted & 0.765 & 0.765 & 0.831 & 119 & 119 & 0 & 0 & 0 & 93 \\ + union & 0.785 & 0.783 & 0.813 & 106 & 106 & 182 & 66 & 7 & 6 \\ + ...& & & & & & & & & \\ + \hline + Total & & & & 3613 & 3508 & 2057 & 1160 & 244 & 1045 \\ + \hline + Average & 0.857 & 0.663 & 0.908 & 79.73 & 77.34 & 46.75 & 26.36 & 5.55 & 23.75 \\ + \hline + \end{tabular} +\label{table:eval} +\end{table} + +Table~\ref{table:eval} contains results on five selected problems (each representing a group of problems from one lab session), and averaged results over all 44 problems.\footnote{We report only a subset of results due to space restrictions. Full results and source code can be found at \url{https://ailab.si/ast-patterns/}. } The second, third, and fourth columns provide classification accuracies (CA) of the rule-based, majority, and random-forest classifiers on testing data. The majority classifier and the random forests method, +which had the best overall performance, serve as references for bad and good CA on particular data sets. + +For example, our rules correctly classified 99\% of testing instances for the \code{sister} problem, +the accuracy of the majority classifier was 66\%, and random forests achieved 98\%. CA of rules is also high for problems \code{del} and \code{sum}. It is lower, however, for \code{is\_sorted} and \code{union}, suggesting that the proposed set of AST patterns is insufficient for certain problems. Indeed, after analyzing the problem \code{is\_sorted}, +we observed that our patterns do not cover predicates with a single empty-list (\code{[]}) argument, which occurs as the base case in this problem. For this reason, the rule learning +algorithm failed to learn any C-rules and therefore all programs were classified as incorrect. In the case of \code{union}, many solutions use the cut (\code{!}) operator, which +is also ignored by our pattern generation algorithm. + +We evaluated the quality of hints on incorrect submissions from those student traces +that resulted in a correct program. In the case of the \code{sister} data set, there were 289 such incorrect submission out of 403 submissions in total. + +The columns captioned “Buggy hints” in Table~\ref{table:eval} contain evaluation of hints generated from rules +for incorrect programs (I-rules). For each generated buggy hint we checked whether +it was implemented by the student in the final submission. The column “All” is +the number of all generated buggy hints, while the column “Imp” is the number of +implemented hints. The results show high relevance of generated buggy hints, as 97\% (3508 out of 3613) of them were implemented in the final solution; in other words, the buggy pattern was removed. + +The intent hints are generated when the algorithm fails to find any buggy hints. The column “All” contains the number of generated intent hints, “Imp” the number of implemented main intent hints, and “Alt” is the number +of implemented alternative hints. Notice that the percentage of implemented intent hints is significantly lower +when compared to buggy hints: in the case of problem \code{sister} 84 out of 127 (66\%) hints were implemented, whereas in the case of problem \code{union} only 66 out of 182 (36\%) hints were implemented. On average, 56\% of main intent hints were implemented. + +The last column shows the number of submissions where no hints could be generated. This value is relatively high +for the \code{is\_sorted} problem, because the algorithm could not learn any C-rules and thus no intent hints were generated. + +To sum up, buggy hints seem to be good and reliable, since they are always implemented when presented, even when we tested them on past data -- the decisions of students were not influenced by these hints. The percentage of implemented intent hints is, on average, lower (56\%), which is still not a bad result, providing that it is difficult to determine the programmer’s intent. In 12\% (244 out 2057) of generated intent hints, students implemented an alternative hint that was identified by our algorithm. Overall we were able to generate hints for 84.5\% of incorrect submissions. Of those hints, 86\% were implemented (73\% of all incorrect submissions). + +Last but not least, high classification accuracies in many problems imply that it is possible to correctly determine the correctness of a program by simply checking for the presence of a small number of patterns. Our hypothesis is that there exist some crucial patterns in programs that students have difficulties with. When they figure out these patterns, implementing the rest of the program is usually straightforward. + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "aied2017" +%%% End: \ No newline at end of file diff --git a/paper/introduction.tex b/paper/introduction.tex new file mode 100644 index 0000000..13dc9ec --- /dev/null +++ b/paper/introduction.tex @@ -0,0 +1,27 @@ +\section{Introduction} + +% why automatic feedback +Programming education is becoming increasingly accessible with massive online courses. Since thousands of students can attend such courses, it is impossible for teachers to individually evaluate each participant’s work. On the other hand, in-time feedback that directly addresses students’ mistakes can aid the learning process. Providing feedback automatically could thus greatly enhance these courses. + +% ITS background +Traditional programming tutors use manually constructed domain models to generate feedback. Model-tracing tutors simulate the problem-solving \emph{process}: how students program. This is challenging because there are no well-defined steps when writing a program. Many tutors instead only analyze individual programs submitted by students, and disregard how a program evolved. They often use models coded in terms of constraints or bug libraries~\cite{keuning2016towards}. + +% data-driven domain modeling +Developing a domain model requires significant knowledge-engineering effort~\cite{folsom-kovarik2010plan}. This is particularly true for programming tutors, where most problems have several alternative solutions with many possible implementations~\cite{le2013operationalizing}. Data-driven tutors reduce the necessary effort by mining educational data -- often from online courses -- to learn common errors and generate feedback~\cite{rivers2015data-driven,nguyen2014codewebs,jin2012program}. + +% problem statement +This paper addresses the problem of finding useful features to support data mining in programming tutors. Features should be robust against irrelevant variations in program code (such as renaming a variable) and relatable to the knowledge components of the target skill (programming), so as to support hint generation. + +% our approach: patterns + rules +We describe features with \emph{abstract-syntax-tree patterns} that encode relations between nodes in a program’s abstract syntax tree. We use patterns that describe a path between pairs of leaf nodes referring to variables or values. By omitting some nodes on these paths, patterns can match different programs containing the same relation. We then induce rules to predict program correctness from AST patterns, allowing us to generate hints based on missing or incorrect patterns. + +% evaluation +We evaluated our approach on existing Prolog programs submitted by students during past lab sessions of a second-year university course. For 73\% of incorrect submissions we are able to suggest potentially useful patterns -- those that the students had actually implemented in the final, correct programs. + +% contributions +The main contributions presented in this paper are: AST patterns as features for machine learning, a rule-based model for predicting program correctness, and hint generation from incorrect or missing patterns in student programs. + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "aied2017" +%%% End: diff --git a/paper/method.tex b/paper/method.tex new file mode 100644 index 0000000..5e27229 --- /dev/null +++ b/paper/method.tex @@ -0,0 +1,82 @@ +\section{Method} + +This section explains the three main components of our approach: automatically extracting patterns from student submissions, learning classification rules for correct and incorrect programs, and using those rules to generate hints. + +\subsection{Extracting patterns} +\label{sec:extracting-patterns} + +We construct patterns by connecting pairs of leaf nodes in a program’s AST. For this paper we always select a pair of nodes from the same clause: either two nodes referring to the same variable (like the examples in Fig.~\ref{fig:sister}), or a value (such as the empty list \code{[]} or the number \code{0}) and another variable or value in the same \textsf{compound} or \textsf{binop} (like the blue dotted pattern in Fig.~\ref{fig:sum}). For example, in the clause (the second occurrence of each variable -- \code{A}, \code{B} and \code{C} -- is marked with~’ for disambiguation) + +\begin{Verbatim} +a(A,\textsf{ }B):- + b(A\textsf{'},\textsf{ }C), + B\textsf{'} is C\textsf{'}\textsf{ }+\textsf{ }1. +\end{Verbatim} + +\noindent +we select the following pairs of nodes: \{\code{A},\,\code{A\textsf{'}}\}, \{\code{B},\,\code{B\textsf{'}}\}, \{\code{C},\,\code{C\textsf{'}}\}, \{\code{B\textsf{'}},\,\code{1}\} and \{\code{C\textsf{'}},\,\code{1}\}. + +For each selected pair of leaf nodes $(a,b)$ we build a pattern by walking the AST in depth-first order, and recording nodes that lie on the paths to $a$ and $b$. We omit \textsf{and} nodes, as explained in the previous section. We also include certain nodes that do not lie on a path to any selected leaf. Specifically, we include the functor or operator of all \textsf{compound}, \textsf{binop} or \textsf{unop} nodes containing $a$ or $b$. + +Patterns constructed in this way form the set of features for rule learning. To keep this set at a reasonable size, we only use patterns that have occurred at least five times in submitted programs. + +\subsection{Learning rules} + +We represent students’ programs in the feature space of AST patterns described above. Each pattern corresponds to one binary feature with value $true$ when the pattern is present and $false$ when it is absent. We classify programs as correct or incorrect based on predefined unit tests for each problem, and use these labels for machine learning. + +Since programs can be validated with appropriate unit tests, our goal is not classifying new submissions, but rather to discover patterns associated with program correctness. This approach to machine learning is called \emph{descriptive induction} -- the automatic discovery of patterns describing regularities in data. We use rule learning for this task, because conditions of rules can be easily translated to hints. + +Before explaining the algorithm, let us discuss the reasons why a program can be incorrect. Our experience indicates that bugs in student programs can often be described by 1) some incorrect or \emph{buggy} pattern, which needs to be removed, or 2) some missing relation (pattern) that should be included before the program can be correct. We shall now explain how both types of errors can be identified with rules. + +To discover buggy patterns, the algorithm first learns rules that describe incorrect programs (I-rules). We use a variant of the CN2 algorithm~\cite{cn21991} implemented within the Orange data-mining toolbox~\cite{demsar2013orange}. Since we use rules to generate hints, and since hints should not be presented to students unless they are likely to be correct, we impose additional constraints on the rule learner: +\begin{enumerate} + \item The classification accuracy of each learned rule must exceed a certain threshold (90\% in our experiments). + \item Each conjunct in a condition must be significant with respect to the likelihood-ratio test (in our experiments significance threshold was set to $p=0.05$). + \item A conjunct can only specify the presence of a pattern: we allow feature-value pairs with only $true$ as value. +\end{enumerate} + +\noindent The former two constraints are needed to induce good rules with significant patterns, while the latter constraint assures that rules mention only presence (and not absence) of patterns as reasons for a program to be incorrect. This is important, since conditions of I-rules ought to contain patterns symptomatic of incorrect programs. + +With respect to the second type of error, we could try the same approach and learn rules using the above algorithm for the class of correct programs (C-rules). Having accurate rules for correct programs, the conditional part of these rules would define sufficient combinations of patterns that render a program correct. +It turns out that it is difficult to learn accurate rules for correct programs without specifying absent patterns. If specifying absence of patterns were allowed in rules' condition, learning would result in too many C-rules. + +A possible way to solve this problem is to remove programs that are covered by rules for incorrect class. This way all known buggy patterns are removed from the data, therefore will not be included in C-rules. However, removing incorrect patterns also removes the need for relevant patterns. For example, if all incorrect programs were removed, the single C-rule “$\mathsf{true} ⇒ \mathsf{correct}$” would suffice, which cannot be used to generate hints. We achieved the best results by learning from both data sets, using the original data set (with all programs) to learn rules, and the reduced data set to test whether a rule achieves the required classification accuracy (90\%). + +Even though our main interest is discovery of patterns, we can still use induced rules to classify new programs, for example to evaluate the quality of rules. The classification procedure has three steps: 1) if an I-rule covers the program, classify it as incorrect; 2) else if a C-rule covers the program, classify it as correct; 3) otherwise, if no rule covers the program, classify it as incorrect. + +\subsection{Generating hints} + +Once we have induced the rules for a given problem, we can use them to provide hints based on buggy or missing patterns. To generate a hint for an incorrect program, each rule is considered in turn. We consider two types of feedback: \emph{buggy} hints based on I-rules, and \emph{intent} hints based on C-rules. + +First, all I-rules are checked to find any known incorrect patterns in the program. To find the most likely incorrect patterns, the rules are considered in the order of decreasing quality. If all patterns in the rule “$p_1 ∧ ⋯ ∧ p_k ⇒ \mathsf{incorrect}$” match, we highlight the relevant leaf nodes. As an aside, we found that most I-rules are based on the presence of a single pattern. For the incorrect \code{sum} program from the previous section, our method produces the following highlight + +\begin{Verbatim} +sum([],0). % \textit{base case:} the empty list sums to zero +sum([H|T],\red{\underline{Sum}}):- % \textit{recursive case:} + sum(T,\red{\underline{Sum}}), % sum the tail and + Sum is Sum + H. % add first element (\textit{bug:} reused variable) +\end{Verbatim} + +\noindent +based on the rule “$p ⇒ \mathsf{incorrect}$”, where $p$ corresponds to the solid red pattern in Fig.~\ref{fig:sum}. This rule covers 36 incorrect programs, and one correct program using an unusual solution strategy. + +If no I-rule matches the program, we use C-rules to determine the student’s intent. C-rules group patterns that together indicate a high likelihood that the program is correct. Each C-rule thus defines a particular “solution strategy” in terms of AST patterns. We reason that alerting the student to a missing pattern could help them complete the program without revealing the whole solution. + +When generating a hint from C-rules, we consider all \emph{partially matching} rules “$p_1 ∧ ⋯ ∧ p_k ⇒ \mathsf{correct}$”, where the student’s program matches some (but not all) patterns $p_i$. For each such rule we store the number of matching patterns, and the set of missing patterns. We then return the most common missing pattern among the rules with most matching patterns. + +For example, if we find the following missing pattern for an incorrect program implementing the \code{sister} predicate: + +\begin{Verbatim}[fontfamily=sf] +(clause (head (compound (functor ‘\code{sister}’) (args var))) (binop var ‘\code{\textbackslash{}=}’))\textrm{,} +\end{Verbatim} + +\noindent +we could display a message to the student saying “comparison between \code{X} and some other value is missing”, or “your program is missing the goal \code{X} \code{\textbackslash{}=} \code{?}”. + +This method can find more than one missing pattern for a given partial program. In such cases we return the most commonly occurring pattern as the main hint, and other candidate patterns as alternative hints. We use main and alternative intent hints to establish the upper and lower bounds when evaluating automatic hints. + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "aied2017" +%%% End: \ No newline at end of file diff --git a/paper/outline.org b/paper/outline.org new file mode 100644 index 0000000..a70c1a3 --- /dev/null +++ b/paper/outline.org @@ -0,0 +1,81 @@ +* Introduction (1) :Timotej: +** Generics +ITSs are useful, but expensive to create. Internet increases the reach of ITSs (think MOOC) and allows us to gather a lot of data. The challenge is to extract useful information from this data to improve feedback to students. + - koedinger2013new + +** Programming ITS +A particularly challenging domain: + - no meaningful “solutions steps” + - no easily discernible and independent “program features” + +** Problem +Find good features to describe student programs that: + - are automatically learnable + - can serve as a basis for generating hints + +** Contributions +A pattern language for describing program features. +Rules for classifying correct/incorrect Prolog programs. + +* Background (2) :Timotej: +Model-tracing is not well-suited for programming tutors. + - Hint Factory (Barnes & Stamper) for programming (Jin, Rivers) + - student traces are basically insane anyway, so learning from them is not a good idea (Piech 2015) +Instead we focus on analyzing individual submissions - CBM (maybe just /like/ CBM?). + - why: human tutors are usually able to comment without referring to previous program versions + +Analyzing program based on formal descriptions (usually defined manually): + - plans (Johnson) + - techniques (Brna/Huang/Le) + - strategies (Gerdes) +Comparing programs directly / after canonicalization (Rivers 2015) + - not easy to determine which differences are crucial + +Defining “programming attributes/features”: + - AST subtrees (Nguyen 2014) + - linkage graphs (Jin 2012) + - n-grams or other purely textual attributes + - mostly used for plagiarism (MOSS) / code clone detection + +Tgrep / trx for structural patterns in NLP - use the same for describing code! + +* Dataset (3) :Timotej: +Solution traces +** Motivating example: ancestor/2 + - show and explain correct / buggy program + - examples of patterns & rules + - buggy rules ≈ error classes + - correct rules ≈ program strategies or “plans” → determine intent + - how correct / buggy rules can be used to automatically determine intent and generate feedback + - using rules to assist authoring tutors + +** Patterns +Programming is all about manipulating data (variables/literals). +Patterns encode (only) relations between data nodes in an AST, and are more granular than subtrees. +Patterns abstract away some information +Algorithm for extracting patterns. + - which patterns are considered + +* Method (2) :Martin: +Rule learning. + - first learn correct, then buggy rules +Automatic hint generation. + +* Evaluation (3) :Martin:Timotej: +Example: describe patterns/rules for ancestor/2 (and/or another domain). + - example trace +CA/AUC for rules/RF/…. + Using e.g. RF with patterns for attributes achieves a high classification accuracy for correct/incorrect programs. + Rules are less accurate, but can be used for hints (how can we improve this)? +Simulated hints: for how many submissions would we suggest a hint that was ultimately implemented in the final solution? + Hints from buggy rules are almost always implemented; correct rules are less certain. + +* Conclusions / future work (1) + - syntactic variations (e.g. using = instead of implicit matching) + - combine with canonicalization + - ABML for rule learning + - integrate into a tutor (CodeQ / CloudCoder) to test in situ + +* Misc +Problems: + - predicates with similar clauses (e.g. union/3) diff --git a/paper/patterns.tex b/paper/patterns.tex new file mode 100644 index 0000000..e1aa767 --- /dev/null +++ b/paper/patterns.tex @@ -0,0 +1,191 @@ +\section{AST patterns} + +In this section we describe AST patterns through examples, while Sect.~\ref{sec:extracting-patterns} explains how patterns are extracted from student programs. Consider the following Prolog program implementing the relation \code{sister(X,Y)}\footnote{Binary relations like this one should be read as “\code{X} is a sister/parent/… of \code{Y}”.}: + +\begin{Verbatim} +sister(X,Y):- % X is Y’s sister when: + parent(P,X), + parent(P,Y), % X and Y share a common parent P, + female(X), % X is female, and + X \textbackslash{}= Y. % X and Y are not the same person. +\end{Verbatim} + +Figure~\ref{fig:sister} shows the program’s AST with two patterns. The pattern drawn with blue dotted arrows encodes the fact that the first argument to the \code{sister} predicate also appears in the call to \code{female}. In other words, this pattern states that \code{X} must be female to be a sister. We write this pattern as the s-expression + +\begin{Verbatim}[fontfamily=sf] +(clause (head (compound (functor ‘\code{sister}’) (args var))) + (compound (functor ‘\code{female}’) (args var))) +\end{Verbatim} + +\begin{figure}[htbp] + \centering + \begin{forest} + for tree={ + font=\sf, + edge=gray, + s sep=0.05cm, + } +[text + [clause,name=top + [head,name=h + [compound,name=hc [functor,name=hf [sister,name=hfl]] + [args,name=ha [var,name=hav [X,name=havl,draw,rectangle,thick,blue,dotted,line width=0.4mm]] [args [var [Y]]]]]] + [and + [compound,name=g1 [functor,name=g1f [parent,name=g1fl]] + [args,name=g1a [var,name=g1av [P,name=g1avl,draw,rectangle,thick,red]] [args [var [X]]]]] + [and + [compound,name=g2 [functor,name=g2f [parent,name=g2fl]] + [args,name=g2a [var,name=g2av [P,name=g2avl,draw,rectangle,thick,red]] [args [var [Y]]]]] + [and + [compound,name=g3 [functor,name=g3f [female,name=g3fl]] + [args,name=g3a [var,name=g3av [X,name=g3avl,draw,rectangle,thick,blue,dotted,line width=0.4mm]]]] + [binop [var [X]] [\textbackslash{}{=}] [var [Y]]]]]]]] +% first pattern +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (top) edge[out=-10,in=-170] (g1); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1) edge[transform canvas={xshift=-1.5mm}] (g1f); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1f) edge[transform canvas={xshift=-1.1mm}] (g1fl); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1) edge[transform canvas={xshift=1.5mm}] (g1a); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g1a) edge[transform canvas={xshift=-1.2mm}] (g1av); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (top) edge[out=-10,in=-170,transform canvas={xshift=-2mm}] (g2); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2) edge[transform canvas={xshift=-1.5mm}] (g2f); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2f) edge[transform canvas={xshift=-1.1mm}] (g2fl); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2) edge[transform canvas={xshift=1.5mm}] (g2a); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (g2a) edge[transform canvas={xshift=-1.2mm}] (g2av); +% second pattern +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (top) edge[transform canvas={xshift=-0.8mm,yshift=0.8mm}] (h); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (h) edge[transform canvas={xshift=-1mm}] (hc); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (hc) edge[transform canvas={xshift=-1.5mm}] (hf); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (hf) edge[transform canvas={xshift=-1.1mm}] (hfl); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (hc) edge[transform canvas={xshift=1.5mm}] (ha); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (ha) edge[transform canvas={xshift=-1.2mm}] (hav); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (top) edge[out=-5,in=160] (g3); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3) edge[transform canvas={xshift=-1.5mm}] (g3f); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3f) edge[transform canvas={xshift=-1.1mm}] (g3fl); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3) edge[transform canvas={xshift=1.5mm}] (g3a); +\path[-{Latex[length=1.5mm,width=1mm]},thick,dotted,relative,blue] (g3a) edge[transform canvas={xshift=-1.2mm}] (g3av); + \end{forest} + \caption{The AST for the \code{sister} program, showing two patterns and the leaf nodes inducing them. Solid red arrows equate the first arguments in the two calls to \code{parent}. Dotted blue arrows encode the necessary condition that \code{X} must be female to be a sister.} + \label{fig:sister} +\end{figure} + +Every pattern used in this paper has the same basic structure, and describes paths from a \textsf{clause} node to one or two leaf nodes containing variables or values. All patterns in Figs.~\ref{fig:sister} and~\ref{fig:sum} are induced from such node pairs. For each leaf we also include some local context, for example the predicate name (e.g. \texttt{parent}). + +We regard these patterns as the smallest units of meaning in Prolog programs: each pattern encodes some interaction between two parts of the program. Including more than two leaf nodes in a pattern could make it difficult to pinpoint the exact error when generating hints. Each pattern contains at most two \textsf{var} nodes, so we require they both refer to the same variable -- relating two nodes with different variables would not tell us much about the program. This allows us to omit variable names from patterns. + +We handle syntactic variations in programs by omitting certain nodes from patterns. For example, by not including \textsf{and} nodes, the above pattern can match a clause regardless of the presence (or order) of other goals in its body (i.e., with any arrangement of \textsf{and} nodes in the AST). Order \emph{is} important for the nodes specified in the pattern; this is explained below. + +The second pattern in Fig.~\ref{fig:sister}, drawn with solid red arrows, encodes the fact that the two calls to \code{parent} share the first argument. In other words, \code{X}~and~\code{Y} must have the same parent~\code{P}. + +\begin{Verbatim}[fontfamily=sf] +(clause (compound (functor ‘\code{parent}’) (args var)) + (compound (functor ‘\code{parent}’) (args var))) +\end{Verbatim} + +Patterns describe relations between nodes in a program’s AST. Specifically, the pattern ($a$ $b$ $c$) means that the nodes $b$ and $c$ are descended from $a$, and that $b$ precedes $c$ in a depth-first tree walk. In general, an AST matches the pattern (\textsf{name} $p_1$ … $p_k$) if it contains a node $n$ labeled \textsf{name}; the subtree rooted at $n$ must also contain, in depth-first order, distinct nodes $n_1$ to $n_k$ matching subpatterns $p_1$ to $p_k$. The above pattern, for example, matches only the last of the following programs (the first program is missing one call to \code{parent}, and the second has different variables in positions encoded by the pattern): + +\begin{Verbatim} +\textit{\red{% nonmatching}} \textit{\red{% nonmatching}} \textit{\green{% matching}} +sister(X,Y):- sister(X,Y):- sister(X,Y):- + female(X), female(X), parent(A,X), + parent(P,X), parent(A,X), female(X), + X \textbackslash{}= Y. parent(B,Y), parent(A,Y), + X \textbackslash{}= Y. X \textbackslash{}= Y. +\end{Verbatim} + +A relation between any two objects in a program is insufficient to reason about the program’s behavior on the whole. In the tutoring context, however, there are patterns that strongly indicate the presence of certain bugs. Take for example the following incorrect program to sum a list: + +\begin{Verbatim} +sum([],0). % \textit{base case:} the empty list sums to zero +sum([H|T],Sum):- % \textit{recursive case:} + sum(T,Sum), % sum the tail and + Sum is Sum + H. % add first element (\textit{bug:} reused variable) +\end{Verbatim} + +This error is fairly common with Prolog novices: the variable \code{Sum} is used to represent both the sum of the whole list (line 2), and the sum of only the tail elements (line 3). The last line fails since Prolog cannot unify \code{Sum} with a (generally) different value of \code{Sum\,+\,H}. The program’s AST is displayed in Fig.~\ref{fig:sum}. + +\begin{figure}[htbp] + \centering + \begin{forest} +for tree={ + font=\sf, + edge=gray, + s sep=0.05cm, +} +[text + [clause,name=c1 + [head,name=c1h + [compound,name=c1hc [functor,name=c1hf [sum,name=c1hfv]] + [args,name=c1ha1 [\code{[\,]},name=c1ha1v,draw,rectangle,thick,dotted,line width=0.4mm,blue] + [args,name=c1ha2 [\code{0},name=c1ha2v,draw,rectangle,thick,dotted,line width=0.4mm,blue]]]]]] + [clause,name=c2 + [head,name=c2h + [compound,name=c2hc [functor,name=c2hf [sum,name=c2hfv]] + [args,name=c2ha1 [list [var [H]] [var [T]]] [args,name=c2ha2 [var,name=c2ha2v [Sum,draw,rectangle,thick,red]]]]]] + [and + [compound,name=c2c [functor,name=c2cf [sum,name=c2cfv]] + [args,name=c2ca1 [var [T]] [args,name=c2ca2 [var,name=c2ca2v [Sum,draw,rectangle,thick,red]]]]] + [binop,name=c2b [var,name=c2bv [Sum,draw,rectangle,thick,dashed,orange]] [is,name=c2bo] + [binop,name=c2bb [var,name=c2bbv [Sum,draw,rectangle,thick,dashed,orange]] [+,name=c2bbo] [var [H]]]]]]] +% first pattern +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1) edge[transform canvas={xshift=-1mm}] (c1h); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1h) edge[transform canvas={xshift=-1mm}] (c1hc); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1hc) edge[transform canvas={xshift=-1.2mm}] (c1hf); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1hf) edge[transform canvas={xshift=-1mm}] (c1hfv); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1hc) edge[transform canvas={xshift=1.2mm}] (c1ha1); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1ha1) edge[transform canvas={xshift=-1.2mm}] (c1ha1v); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1ha1) edge[transform canvas={xshift=1.2mm}] (c1ha2); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dotted,line width=0.4mm,blue] (c1ha2) edge[transform canvas={xshift=1mm}] (c1ha2v); +% second pattern +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2) edge[transform canvas={xshift=-0.8mm,yshift=0.8mm}] (c2h); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2h) edge[transform canvas={xshift=-1mm}] (c2hc); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2hc) edge[transform canvas={xshift=-1.2mm}] (c2hf); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2hf) edge[transform canvas={xshift=-0.8mm,yshift=0.8mm}] (c2hfv); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2hc) edge[transform canvas={xshift=1.2mm}] (c2ha1); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ha1) edge[transform canvas={xshift=1.2mm}] (c2ha2); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ha2) edge[transform canvas={xshift=1mm}] (c2ha2v); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2) edge[out=-15,in=-170] (c2c); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2c) edge[transform canvas={xshift=-1.2mm}] (c2cf); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2cf) edge[transform canvas={xshift=-1mm}] (c2cfv); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2c) edge[transform canvas={xshift=1.2mm}] (c2ca1); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ca1) edge[transform canvas={xshift=1.2mm}] (c2ca2); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,red] (c2ca2) edge[transform canvas={xshift=1mm}] (c2ca2v); +% third pattern +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2) edge[out=20,in=150] (c2b); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2b) edge[transform canvas={xshift=-1.1mm}] (c2bv); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2b) edge[transform canvas={xshift=0.8mm}] (c2bo); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2b) edge[transform canvas={xshift=1mm}] (c2bb); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2bb) edge[transform canvas={xshift=-1mm}] (c2bbv); +\path[-{Latex[length=1.5mm,width=1mm]},thick,relative,dashed,orange] (c2bb) edge[transform canvas={xshift=0.9mm}] (c2bbo); + \end{forest} + \caption{The AST for the buggy \texttt{sum} program. Dotted arrows relate the correct values in the base case. Solid and dashed arrows denote two patterns describing incorrect reuse of the \code{Sum} variable in the recursive case.} + \label{fig:sum} +\end{figure} + +Various patterns capture this mistake. Solid red arrows in Fig.~\ref{fig:sum} show one example -- \code{Sum} returned by the predicate should not be the same as the \code{Sum} from the recursive call: + +\begin{Verbatim}[fontfamily=sf] +(clause (head (compound (functor ‘\code{sum}’) (args (args var)))) + (compound (functor ‘\code{sum}’) (args (args var)))) +\end{Verbatim} + +\noindent +The second pattern, drawn with dashed orange arrows in the figure, indicates the likely error in the arithmetic expression: + +\begin{Verbatim}[fontfamily=sf] +(clause (binop var ‘\code{is}’ (binop var ‘\code{+}’))) +\end{Verbatim} + +The leftmost pattern in Fig.~\ref{fig:sum}, drawn with dotted blue arrows, describes the correct relation between the two constants in the base-case rule: + +\begin{Verbatim}[fontfamily=sf] +(clause (head (compound (functor ‘\code{sum}’) (args \code{[]} (args \code{0}))))) +\end{Verbatim} + +\noindent +We include such patterns in our feature set to cover the base-case clauses in recursive programs, which often include no variables. + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "aied2017" +%%% End: \ No newline at end of file -- cgit v1.2.1