From ecdc0b2a576cfddabc9b9763a32b28ae30fee3c0 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 27 Feb 2017 11:51:28 +0100 Subject: Implement pattern-based hints (see AIED2017) --- monkey/patterns.py | 149 +++++++++++++++++++++++++++++++++++++++++++++++ monkey/rules.py | 131 +++++++++++++++++++++++++++++++++++++++++ server/problems.py | 8 +++ server/prolog_session.py | 30 +++------- 4 files changed, 296 insertions(+), 22 deletions(-) create mode 100644 monkey/patterns.py create mode 100644 monkey/rules.py diff --git a/monkey/patterns.py b/monkey/patterns.py new file mode 100644 index 0000000..7dfa96f --- /dev/null +++ b/monkey/patterns.py @@ -0,0 +1,149 @@ +# CodeQ: an online programming tutor. +# Copyright (C) 2016,2017 UL FRI +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import collections +from itertools import chain, combinations, product + +from nltk import Tree + +from prolog.util import parse as prolog_parse + +# construct pattern to match the structure of nodes given by [include], +# supports variables and literals +def pattern(node, include): + if not isinstance(node, Tree): + return None + + label = node.label() + if any(n is node for n in include): + if label == 'literal': + return '"{}"'.format(node[0].val) + if label == 'variable': + return '{}'.format(label) + return None + if label == 'functor': + return '({} "{}")'.format(label, node[0].val) + + subpats = [pattern(child, include) for child in node] + pat = None + if any(subpats): + if label == 'and': + if subpats[1]: + pat = subpats[1] + if subpats[0]: + if pat: + pat = subpats[0] + ' ' + pat + else: + pat = subpats[0] + elif label == 'args': + pat = label + for i, subpat in enumerate(subpats): + if subpat: + pat += ' {}'.format(subpat) + pat = '(' + pat + ')' + elif label == 'unop': + pat = '(' + label + ' ' + node[0].val + ' ' + subpats[1] + ')' + elif label == 'binop': + pat = label + if subpats[0]: + pat += ' {}'.format(subpats[0]) + pat += ' "{}"'.format(node[1].val) + if subpats[2]: + pat += ' {}'.format(subpats[2]) + pat = '(' + pat + ')' + elif label == 'clause': + pat = label + for i, subpat in enumerate(subpats): + if subpat: + pat += ' {}'.format(subpats[i]) + return '(' + pat + ')' + elif label == 'compound': + if len(subpats) > 1 and subpats[1]: + pat = label + for i, subpat in enumerate(subpats): + pat += ' {}'.format(subpat) + pat = '(' + pat + ')' + else: + return None + elif label == 'head': + pat = label + pat += ' {}'.format(subpats[0]) + pat = '(' + pat + ')' + elif label == 'list': + pat = 'list ' + if subpats[0]: + pat += '(h {})'.format(subpats[0]) + if subpats[0] and subpats[1]: + pat += ' ' + if subpats[1]: + pat += '(t {})'.format(subpats[1]) + pat = '(' + pat + ')' + if not pat: + for s in subpats: + if s: + pat = s + break + return pat + +def get_patterns(tree): + if isinstance(tree, str): + tree = prolog_parse(tree) + if tree is None: + return + + # get patterns separately for each clause + for clause in tree: + # collect variable nodes in this clause + variables = collections.defaultdict(list) + for node in clause.subtrees(): + if isinstance(node, Tree) and node.label() == 'variable': + name = node[0].val + variables[name].append(node) + + # yield patterns for singleton variables + for var, nodes in variables.items(): + if len(nodes) == 1: + pat = pattern(clause, nodes) + if pat: + yield pat, nodes + + # yield patterns for variable-variable pairs (within a clause) + for var, nodes in variables.items(): + for selected in combinations(nodes, 2): + pat = pattern(clause, selected) + if pat: + yield pat, selected + + # yield patterns for variable-literal / literal-literal pairs + # yield patterns for singleton literals + # (only within a topmost compound / binop / unop) + def patterns_with_literals(node): + if not isinstance(node, Tree): + return + if node.label() in {'compound', 'binop', 'unop'}: + vars = [n for n in node.subtrees() if n.label() == 'variable'] + lits = [n for n in node.subtrees() if n.label() == 'literal'] + for selected in chain( + combinations(lits, 1), + combinations(lits, 2), + product(lits, vars)): + pat = pattern(clause, selected) + if pat: + yield pat, selected + else: + for child in node: + yield from patterns_with_literals(child) + yield from patterns_with_literals(clause) diff --git a/monkey/rules.py b/monkey/rules.py new file mode 100644 index 0000000..fcd9582 --- /dev/null +++ b/monkey/rules.py @@ -0,0 +1,131 @@ +# CodeQ: an online programming tutor. +# Copyright (C) 2017 UL FRI +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import collections + +from nltk import Tree + +from .patterns import get_patterns + +# return a hint for the best applicable buggy rule +def suggest_buggy(bugs, program, patterns): + for rule in [r for r in bugs['rules'] if not r['class']]: + matches = [] + for rule_pattern in rule['condition']: + for pattern, nodes in patterns: + if rule_pattern == pattern: + matches.append((pattern, nodes)) + break + + # found a rule, now do the cha-cha-hint + if len(matches) == len(rule['condition']): + hints = [] + for pattern, nodes in matches: + # find subtrees to point out to student + top_nodes = [] + for node in nodes: + # highlight the node + hints.append({'id': 'monkey_highlight', 'start': node.start, 'end': node.end}) + # find top-level clause (or similar) node containing the highlighted node + while node.parent().label() in {'compound', 'binop', 'unop', 'args', 'list', 'h', 't'}: + node = node.parent() + if not any(n is node for n in top_nodes): + top_nodes.append(node) + + hint = {'args': {'fragments': [program[n.start:n.end] for n in top_nodes]}} + if all(isinstance(n, Tree) and n.label() == 'variable' for n in nodes): + # two instances of a variable + hint['id'] = 'monkey_singleton' if len(nodes) == 1 else 'monkey_buggy_variable' + hint['args']['variable'] = str(nodes[0][0]) + else: + # a literal and another literal/variable in the same clause + hint['id'] = 'monkey_buggy_literal' + hint['args']['literals'] = [program[n.start:n.end] for n in nodes] + hints.append(hint) + return hints + return None + +# return a hint from rules with the most matching patterns +# (most common nonmatching pattern is used as hint) +def suggest_true(bugs, program, patterns): + # find a list of found / missing patterns for each intent rule + matches = collections.defaultdict(list) + for rule in [r for r in bugs['rules'] if r['class']]: + match = {True: [], False: []} # lists of found / missing patterns + for rule_pattern in rule['condition']: + found = any(pattern == rule_pattern for pattern, nodes in patterns) + match[found].append(rule_pattern) + n_found = len(match[True]) + matches[n_found].append(match) + + # consider only rules that have maximum matching patterns + candidate_rules = matches[max(matches)] + + # if there is candidate with no missing patterns, the program should already + # be correct, so do not try hinting + if any(not match[False] for match in candidate_rules): + return None + + # find the number of candidate rules missing each pattern + missing_patterns = collections.Counter() + for match in candidate_rules: + for pattern in match[False]: + missing_patterns[pattern] += 1 + + # consider only the most commonly missing patterns + max_count = max(missing_patterns.values()) + candidate_patterns = [pattern for pattern, count in missing_patterns.items() if count == max_count] + + # find the most common (in the learning set) pattern among the candidates + for pattern in bugs['patterns']: + if pattern in candidate_patterns: + return [{'id': 'monkey_missing'}] + return None + +# check if any pattern in the program has not been observed before, and return as buggy +def suggest_unknown_pattern(bugs, program, patterns): + hints = [] + top_nodes = [] + for pattern, nodes in patterns: + if len(nodes) == 2 and pattern not in bugs['patterns']: + for node in nodes: + # highlight the node + hints.append({'id': 'monkey_highlight', 'start': node.start, 'end': node.end}) + # find top-level clause (or similar) node containing the highlighted node + while node.parent().label() in {'compound', 'binop', 'unop', 'args', 'list', 'h', 't'}: + node = node.parent() + if not any(n is node for n in top_nodes): + top_nodes.append(node) + if hints: + hints.append({ + 'id': 'monkey_unknown', + 'args': {'fragments': [program[n.start:n.end] for n in top_nodes]} + }) + return hints + return None + +def suggest(bugs, program): + try: + patterns = list(get_patterns(program)) + hints = suggest_buggy(bugs, program, patterns) + if not hints: + hints = suggest_true(bugs, program, patterns) + if not hints: + hints = suggest_unknown_pattern(bugs, program, patterns) + return hints + except: + # should never happen + return None diff --git a/server/problems.py b/server/problems.py index 10dec0d..9cccc21 100644 --- a/server/problems.py +++ b/server/problems.py @@ -65,6 +65,14 @@ def load_problem(language, problem_group, problem, tail_module): def load_facts(language, fact_module): return load_module('{0}.facts.{1}'.format(language, fact_module)) +def load_file(language, group, problem, name): + path = os.path.join(_path_prefix, language, 'problems', group, problem, name) + try: + with open(path, 'r') as f: + return f.read() + except: + return None + def load_problems(language, tuples, tail_module): modules = [] for problem_group, problem in tuples: diff --git a/server/prolog_session.py b/server/prolog_session.py index 6cb1592..8c101c6 100644 --- a/server/prolog_session.py +++ b/server/prolog_session.py @@ -14,19 +14,19 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import json import operator import os.path -import pickle import threading from db.models import CodeqUser, Problem from db.util import make_identifier -import monkey +import monkey.rules import prolog.engine from prolog.util import used_predicates import server import server.user_session -from server.problems import get_facts, load_language, load_problem, solutions_for_problems +from server.problems import get_facts, load_file, load_language, load_problem, solutions_for_problems __all__ = ['PrologSession'] @@ -145,16 +145,11 @@ class PrologSession(server.LanguageSession): hints = problem_module.hint(program, aux_code=aux_code) # automatic hints - if not hints and problem_id in _edits and \ - allowed_hints in ('all', 'automatic'): - # Testing function for monkey. - def tester(code): - n_correct, n_all, _ = problem_module.test(code, aux_code=aux_code) - return n_correct, n_all - solution, steps, fix_time, n_tested = monkey.fix( - program, _edits[problem_id], tester, timeout=3) - if solution and steps: - hints = [{'id': 'monkey_main'}] + monkey.fix_hints(program, steps) + if not hints and allowed_hints in ('all', 'automatic'): + bug_data = load_file(problem.language, problem.group, problem.identifier, 'bugs.json') + if bug_data is not None: + bugs = json.loads(bug_data) + hints = monkey.rules.suggest(bugs, program) # generic language hints (style etc.) if not hints and hasattr(language_module, 'hint'): @@ -205,12 +200,3 @@ class PrologSession(server.LanguageSession): return messages, status, have_more server.language_session_handlers['prolog'] = lambda user_session, problem_id, language_identifier, group_identifier, problem_identifier: PrologSession() - -# Load edit data. -try: - _edits, _submissions, _queries = pickle.load( - open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'edits.pickle'), 'rb')) -except: - _edits = {} - _submissions = {} - _queries = {} -- cgit v1.2.1