summaryrefslogtreecommitdiff
path: root/monkey
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2017-02-27 11:51:28 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2017-02-27 11:51:28 +0100
commitecdc0b2a576cfddabc9b9763a32b28ae30fee3c0 (patch)
treef479c62949dd7851e66df5ee7f9211230b3cc5ad /monkey
parent488c40522f831d7ef84efdd07f895479b79391c1 (diff)
Implement pattern-based hints (see AIED2017)
Diffstat (limited to 'monkey')
-rw-r--r--monkey/patterns.py149
-rw-r--r--monkey/rules.py131
2 files changed, 280 insertions, 0 deletions
diff --git a/monkey/patterns.py b/monkey/patterns.py
new file mode 100644
index 0000000..7dfa96f
--- /dev/null
+++ b/monkey/patterns.py
@@ -0,0 +1,149 @@
+# CodeQ: an online programming tutor.
+# Copyright (C) 2016,2017 UL FRI
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import collections
+from itertools import chain, combinations, product
+
+from nltk import Tree
+
+from prolog.util import parse as prolog_parse
+
+# construct pattern to match the structure of nodes given by [include],
+# supports variables and literals
+def pattern(node, include):
+ if not isinstance(node, Tree):
+ return None
+
+ label = node.label()
+ if any(n is node for n in include):
+ if label == 'literal':
+ return '"{}"'.format(node[0].val)
+ if label == 'variable':
+ return '{}'.format(label)
+ return None
+ if label == 'functor':
+ return '({} "{}")'.format(label, node[0].val)
+
+ subpats = [pattern(child, include) for child in node]
+ pat = None
+ if any(subpats):
+ if label == 'and':
+ if subpats[1]:
+ pat = subpats[1]
+ if subpats[0]:
+ if pat:
+ pat = subpats[0] + ' ' + pat
+ else:
+ pat = subpats[0]
+ elif label == 'args':
+ pat = label
+ for i, subpat in enumerate(subpats):
+ if subpat:
+ pat += ' {}'.format(subpat)
+ pat = '(' + pat + ')'
+ elif label == 'unop':
+ pat = '(' + label + ' ' + node[0].val + ' ' + subpats[1] + ')'
+ elif label == 'binop':
+ pat = label
+ if subpats[0]:
+ pat += ' {}'.format(subpats[0])
+ pat += ' "{}"'.format(node[1].val)
+ if subpats[2]:
+ pat += ' {}'.format(subpats[2])
+ pat = '(' + pat + ')'
+ elif label == 'clause':
+ pat = label
+ for i, subpat in enumerate(subpats):
+ if subpat:
+ pat += ' {}'.format(subpats[i])
+ return '(' + pat + ')'
+ elif label == 'compound':
+ if len(subpats) > 1 and subpats[1]:
+ pat = label
+ for i, subpat in enumerate(subpats):
+ pat += ' {}'.format(subpat)
+ pat = '(' + pat + ')'
+ else:
+ return None
+ elif label == 'head':
+ pat = label
+ pat += ' {}'.format(subpats[0])
+ pat = '(' + pat + ')'
+ elif label == 'list':
+ pat = 'list '
+ if subpats[0]:
+ pat += '(h {})'.format(subpats[0])
+ if subpats[0] and subpats[1]:
+ pat += ' '
+ if subpats[1]:
+ pat += '(t {})'.format(subpats[1])
+ pat = '(' + pat + ')'
+ if not pat:
+ for s in subpats:
+ if s:
+ pat = s
+ break
+ return pat
+
+def get_patterns(tree):
+ if isinstance(tree, str):
+ tree = prolog_parse(tree)
+ if tree is None:
+ return
+
+ # get patterns separately for each clause
+ for clause in tree:
+ # collect variable nodes in this clause
+ variables = collections.defaultdict(list)
+ for node in clause.subtrees():
+ if isinstance(node, Tree) and node.label() == 'variable':
+ name = node[0].val
+ variables[name].append(node)
+
+ # yield patterns for singleton variables
+ for var, nodes in variables.items():
+ if len(nodes) == 1:
+ pat = pattern(clause, nodes)
+ if pat:
+ yield pat, nodes
+
+ # yield patterns for variable-variable pairs (within a clause)
+ for var, nodes in variables.items():
+ for selected in combinations(nodes, 2):
+ pat = pattern(clause, selected)
+ if pat:
+ yield pat, selected
+
+ # yield patterns for variable-literal / literal-literal pairs
+ # yield patterns for singleton literals
+ # (only within a topmost compound / binop / unop)
+ def patterns_with_literals(node):
+ if not isinstance(node, Tree):
+ return
+ if node.label() in {'compound', 'binop', 'unop'}:
+ vars = [n for n in node.subtrees() if n.label() == 'variable']
+ lits = [n for n in node.subtrees() if n.label() == 'literal']
+ for selected in chain(
+ combinations(lits, 1),
+ combinations(lits, 2),
+ product(lits, vars)):
+ pat = pattern(clause, selected)
+ if pat:
+ yield pat, selected
+ else:
+ for child in node:
+ yield from patterns_with_literals(child)
+ yield from patterns_with_literals(clause)
diff --git a/monkey/rules.py b/monkey/rules.py
new file mode 100644
index 0000000..fcd9582
--- /dev/null
+++ b/monkey/rules.py
@@ -0,0 +1,131 @@
+# CodeQ: an online programming tutor.
+# Copyright (C) 2017 UL FRI
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import collections
+
+from nltk import Tree
+
+from .patterns import get_patterns
+
+# return a hint for the best applicable buggy rule
+def suggest_buggy(bugs, program, patterns):
+ for rule in [r for r in bugs['rules'] if not r['class']]:
+ matches = []
+ for rule_pattern in rule['condition']:
+ for pattern, nodes in patterns:
+ if rule_pattern == pattern:
+ matches.append((pattern, nodes))
+ break
+
+ # found a rule, now do the cha-cha-hint
+ if len(matches) == len(rule['condition']):
+ hints = []
+ for pattern, nodes in matches:
+ # find subtrees to point out to student
+ top_nodes = []
+ for node in nodes:
+ # highlight the node
+ hints.append({'id': 'monkey_highlight', 'start': node.start, 'end': node.end})
+ # find top-level clause (or similar) node containing the highlighted node
+ while node.parent().label() in {'compound', 'binop', 'unop', 'args', 'list', 'h', 't'}:
+ node = node.parent()
+ if not any(n is node for n in top_nodes):
+ top_nodes.append(node)
+
+ hint = {'args': {'fragments': [program[n.start:n.end] for n in top_nodes]}}
+ if all(isinstance(n, Tree) and n.label() == 'variable' for n in nodes):
+ # two instances of a variable
+ hint['id'] = 'monkey_singleton' if len(nodes) == 1 else 'monkey_buggy_variable'
+ hint['args']['variable'] = str(nodes[0][0])
+ else:
+ # a literal and another literal/variable in the same clause
+ hint['id'] = 'monkey_buggy_literal'
+ hint['args']['literals'] = [program[n.start:n.end] for n in nodes]
+ hints.append(hint)
+ return hints
+ return None
+
+# return a hint from rules with the most matching patterns
+# (most common nonmatching pattern is used as hint)
+def suggest_true(bugs, program, patterns):
+ # find a list of found / missing patterns for each intent rule
+ matches = collections.defaultdict(list)
+ for rule in [r for r in bugs['rules'] if r['class']]:
+ match = {True: [], False: []} # lists of found / missing patterns
+ for rule_pattern in rule['condition']:
+ found = any(pattern == rule_pattern for pattern, nodes in patterns)
+ match[found].append(rule_pattern)
+ n_found = len(match[True])
+ matches[n_found].append(match)
+
+ # consider only rules that have maximum matching patterns
+ candidate_rules = matches[max(matches)]
+
+ # if there is candidate with no missing patterns, the program should already
+ # be correct, so do not try hinting
+ if any(not match[False] for match in candidate_rules):
+ return None
+
+ # find the number of candidate rules missing each pattern
+ missing_patterns = collections.Counter()
+ for match in candidate_rules:
+ for pattern in match[False]:
+ missing_patterns[pattern] += 1
+
+ # consider only the most commonly missing patterns
+ max_count = max(missing_patterns.values())
+ candidate_patterns = [pattern for pattern, count in missing_patterns.items() if count == max_count]
+
+ # find the most common (in the learning set) pattern among the candidates
+ for pattern in bugs['patterns']:
+ if pattern in candidate_patterns:
+ return [{'id': 'monkey_missing'}]
+ return None
+
+# check if any pattern in the program has not been observed before, and return as buggy
+def suggest_unknown_pattern(bugs, program, patterns):
+ hints = []
+ top_nodes = []
+ for pattern, nodes in patterns:
+ if len(nodes) == 2 and pattern not in bugs['patterns']:
+ for node in nodes:
+ # highlight the node
+ hints.append({'id': 'monkey_highlight', 'start': node.start, 'end': node.end})
+ # find top-level clause (or similar) node containing the highlighted node
+ while node.parent().label() in {'compound', 'binop', 'unop', 'args', 'list', 'h', 't'}:
+ node = node.parent()
+ if not any(n is node for n in top_nodes):
+ top_nodes.append(node)
+ if hints:
+ hints.append({
+ 'id': 'monkey_unknown',
+ 'args': {'fragments': [program[n.start:n.end] for n in top_nodes]}
+ })
+ return hints
+ return None
+
+def suggest(bugs, program):
+ try:
+ patterns = list(get_patterns(program))
+ hints = suggest_buggy(bugs, program, patterns)
+ if not hints:
+ hints = suggest_true(bugs, program, patterns)
+ if not hints:
+ hints = suggest_unknown_pattern(bugs, program, patterns)
+ return hints
+ except:
+ # should never happen
+ return None