diff options
Diffstat (limited to 'monkey')
-rw-r--r-- | monkey/patterns.py | 149 | ||||
-rw-r--r-- | monkey/rules.py | 131 |
2 files changed, 280 insertions, 0 deletions
diff --git a/monkey/patterns.py b/monkey/patterns.py new file mode 100644 index 0000000..7dfa96f --- /dev/null +++ b/monkey/patterns.py @@ -0,0 +1,149 @@ +# CodeQ: an online programming tutor. +# Copyright (C) 2016,2017 UL FRI +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import collections +from itertools import chain, combinations, product + +from nltk import Tree + +from prolog.util import parse as prolog_parse + +# construct pattern to match the structure of nodes given by [include], +# supports variables and literals +def pattern(node, include): + if not isinstance(node, Tree): + return None + + label = node.label() + if any(n is node for n in include): + if label == 'literal': + return '"{}"'.format(node[0].val) + if label == 'variable': + return '{}'.format(label) + return None + if label == 'functor': + return '({} "{}")'.format(label, node[0].val) + + subpats = [pattern(child, include) for child in node] + pat = None + if any(subpats): + if label == 'and': + if subpats[1]: + pat = subpats[1] + if subpats[0]: + if pat: + pat = subpats[0] + ' ' + pat + else: + pat = subpats[0] + elif label == 'args': + pat = label + for i, subpat in enumerate(subpats): + if subpat: + pat += ' {}'.format(subpat) + pat = '(' + pat + ')' + elif label == 'unop': + pat = '(' + label + ' ' + node[0].val + ' ' + subpats[1] + ')' + elif label == 'binop': + pat = label + if subpats[0]: + pat += ' {}'.format(subpats[0]) + pat += ' "{}"'.format(node[1].val) + if subpats[2]: + pat += ' {}'.format(subpats[2]) + pat = '(' + pat + ')' + elif label == 'clause': + pat = label + for i, subpat in enumerate(subpats): + if subpat: + pat += ' {}'.format(subpats[i]) + return '(' + pat + ')' + elif label == 'compound': + if len(subpats) > 1 and subpats[1]: + pat = label + for i, subpat in enumerate(subpats): + pat += ' {}'.format(subpat) + pat = '(' + pat + ')' + else: + return None + elif label == 'head': + pat = label + pat += ' {}'.format(subpats[0]) + pat = '(' + pat + ')' + elif label == 'list': + pat = 'list ' + if subpats[0]: + pat += '(h {})'.format(subpats[0]) + if subpats[0] and subpats[1]: + pat += ' ' + if subpats[1]: + pat += '(t {})'.format(subpats[1]) + pat = '(' + pat + ')' + if not pat: + for s in subpats: + if s: + pat = s + break + return pat + +def get_patterns(tree): + if isinstance(tree, str): + tree = prolog_parse(tree) + if tree is None: + return + + # get patterns separately for each clause + for clause in tree: + # collect variable nodes in this clause + variables = collections.defaultdict(list) + for node in clause.subtrees(): + if isinstance(node, Tree) and node.label() == 'variable': + name = node[0].val + variables[name].append(node) + + # yield patterns for singleton variables + for var, nodes in variables.items(): + if len(nodes) == 1: + pat = pattern(clause, nodes) + if pat: + yield pat, nodes + + # yield patterns for variable-variable pairs (within a clause) + for var, nodes in variables.items(): + for selected in combinations(nodes, 2): + pat = pattern(clause, selected) + if pat: + yield pat, selected + + # yield patterns for variable-literal / literal-literal pairs + # yield patterns for singleton literals + # (only within a topmost compound / binop / unop) + def patterns_with_literals(node): + if not isinstance(node, Tree): + return + if node.label() in {'compound', 'binop', 'unop'}: + vars = [n for n in node.subtrees() if n.label() == 'variable'] + lits = [n for n in node.subtrees() if n.label() == 'literal'] + for selected in chain( + combinations(lits, 1), + combinations(lits, 2), + product(lits, vars)): + pat = pattern(clause, selected) + if pat: + yield pat, selected + else: + for child in node: + yield from patterns_with_literals(child) + yield from patterns_with_literals(clause) diff --git a/monkey/rules.py b/monkey/rules.py new file mode 100644 index 0000000..fcd9582 --- /dev/null +++ b/monkey/rules.py @@ -0,0 +1,131 @@ +# CodeQ: an online programming tutor. +# Copyright (C) 2017 UL FRI +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import collections + +from nltk import Tree + +from .patterns import get_patterns + +# return a hint for the best applicable buggy rule +def suggest_buggy(bugs, program, patterns): + for rule in [r for r in bugs['rules'] if not r['class']]: + matches = [] + for rule_pattern in rule['condition']: + for pattern, nodes in patterns: + if rule_pattern == pattern: + matches.append((pattern, nodes)) + break + + # found a rule, now do the cha-cha-hint + if len(matches) == len(rule['condition']): + hints = [] + for pattern, nodes in matches: + # find subtrees to point out to student + top_nodes = [] + for node in nodes: + # highlight the node + hints.append({'id': 'monkey_highlight', 'start': node.start, 'end': node.end}) + # find top-level clause (or similar) node containing the highlighted node + while node.parent().label() in {'compound', 'binop', 'unop', 'args', 'list', 'h', 't'}: + node = node.parent() + if not any(n is node for n in top_nodes): + top_nodes.append(node) + + hint = {'args': {'fragments': [program[n.start:n.end] for n in top_nodes]}} + if all(isinstance(n, Tree) and n.label() == 'variable' for n in nodes): + # two instances of a variable + hint['id'] = 'monkey_singleton' if len(nodes) == 1 else 'monkey_buggy_variable' + hint['args']['variable'] = str(nodes[0][0]) + else: + # a literal and another literal/variable in the same clause + hint['id'] = 'monkey_buggy_literal' + hint['args']['literals'] = [program[n.start:n.end] for n in nodes] + hints.append(hint) + return hints + return None + +# return a hint from rules with the most matching patterns +# (most common nonmatching pattern is used as hint) +def suggest_true(bugs, program, patterns): + # find a list of found / missing patterns for each intent rule + matches = collections.defaultdict(list) + for rule in [r for r in bugs['rules'] if r['class']]: + match = {True: [], False: []} # lists of found / missing patterns + for rule_pattern in rule['condition']: + found = any(pattern == rule_pattern for pattern, nodes in patterns) + match[found].append(rule_pattern) + n_found = len(match[True]) + matches[n_found].append(match) + + # consider only rules that have maximum matching patterns + candidate_rules = matches[max(matches)] + + # if there is candidate with no missing patterns, the program should already + # be correct, so do not try hinting + if any(not match[False] for match in candidate_rules): + return None + + # find the number of candidate rules missing each pattern + missing_patterns = collections.Counter() + for match in candidate_rules: + for pattern in match[False]: + missing_patterns[pattern] += 1 + + # consider only the most commonly missing patterns + max_count = max(missing_patterns.values()) + candidate_patterns = [pattern for pattern, count in missing_patterns.items() if count == max_count] + + # find the most common (in the learning set) pattern among the candidates + for pattern in bugs['patterns']: + if pattern in candidate_patterns: + return [{'id': 'monkey_missing'}] + return None + +# check if any pattern in the program has not been observed before, and return as buggy +def suggest_unknown_pattern(bugs, program, patterns): + hints = [] + top_nodes = [] + for pattern, nodes in patterns: + if len(nodes) == 2 and pattern not in bugs['patterns']: + for node in nodes: + # highlight the node + hints.append({'id': 'monkey_highlight', 'start': node.start, 'end': node.end}) + # find top-level clause (or similar) node containing the highlighted node + while node.parent().label() in {'compound', 'binop', 'unop', 'args', 'list', 'h', 't'}: + node = node.parent() + if not any(n is node for n in top_nodes): + top_nodes.append(node) + if hints: + hints.append({ + 'id': 'monkey_unknown', + 'args': {'fragments': [program[n.start:n.end] for n in top_nodes]} + }) + return hints + return None + +def suggest(bugs, program): + try: + patterns = list(get_patterns(program)) + hints = suggest_buggy(bugs, program, patterns) + if not hints: + hints = suggest_true(bugs, program, patterns) + if not hints: + hints = suggest_unknown_pattern(bugs, program, patterns) + return hints + except: + # should never happen + return None |