monkey/__init__.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

# CodeQ: an online programming tutor.
# Copyright (C) 2015 UL FRI
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import time

from prolog.util import tokenize, stringify, parse, interesting_ranges, rename_vars_list
from .util import PQueue

# Starting from [code], find a sequence of edits that transforms it into a
# correct predicate for [name]. The [test] function is used to test generated
# programs.
# Return (solution, edits, time spent, #programs checked). If no solution is
# found within [timeout] seconds, return solution='' and edits=[].
def fix(code, edits, test, timeout=30, debug=False):
    def step(program, path=None):
        tokens = program.leaves()
        for part, range_path in interesting_ranges(program):
            names = {}
            part_normal = tuple(rename_vars_list(part, names))
            for (path, a, b), (p, uids) in edits.items():
                if path == range_path and a == part_normal:
                    reverse_names = {v: k for k, v in names.items()}
                    b_real = tuple(rename_vars_list(b, reverse_names))
                    new_tokens = []
                    idx = None
                    for i, t in enumerate(tokens):
                        if t.pos >= part[0].pos and t.pos+len(t.val) <= part[-1].pos+len(part[-1].val):
                            if idx is None:
                                idx = i
                                new_tokens.extend(b_real)
                        else:
                            new_tokens.append(t)
                    new_code = stringify(new_tokens)
                    new_program = parse(new_code)
                    new_step = (idx, tuple(part), b_real)
                    if new_program is not None:
                        yield (new_program.freeze(), new_step, p)

    # Main loop: best-first search through generated programs.
    todo = PQueue()  # priority queue of candidate solutions
    done = set()     # programs we have already visited
    n_tested = 0
    start_time = time.monotonic()
    total_time = 0

    # Each program gets a task with the sequence of edits that generated it and
    # the associated cost. First candidate with cost 1 is the initial program.
    program = parse(code)
    if program is None:
        return '', [], total_time, n_tested
    todo.push((program.freeze(), (), 1.0), -1.0)

    while total_time < timeout:
        # Get next candidate.
        task = todo.pop()
        if task == None:
            break
        program, path, path_cost = task

        # If we have already seen this code, skip it.
        code = stringify(program)
        if code in done:
            continue
        done.add(code)

        # Print some info about the current task.
        if debug:
            print('Cost {:.12f}'.format(path_cost))
            for idx, a, b in path:
                print('{}: {} → {}'.format(idx, stringify(a), stringify(b)))

        # If the code is correct, we are done.
        n_correct, n_all = test(code)
        if n_correct == n_all:
            return code, path, total_time, n_tested
        n_tested += 1

        # Otherwise generate new solutions.
        for new_program, new_step, new_cost in step(program, path):
            new_path_cost = path_cost * new_cost
            if len(todo) > 500 and new_path_cost < 0.001:
                continue
            new_path = path + (new_step,)
            todo.push((new_program, new_path, new_path_cost), -new_path_cost)

        total_time = time.monotonic() - start_time

    return '', [], total_time, n_tested

def min_diff(a, b):
    first = 0
    while first < len(a) and first < len(b) and a[first] == b[first]:
        first += 1
    last = 0
    while first-last < len(a) and first-last < len(b) and a[last-1] == b[last-1]:
        last -= 1
    return first, last

def fix_hints(code, steps):
    hints = []
    tokens = tokenize(code)
    for idx, a, b in steps:
        start, end = min_diff(a, b)

        if start >= len(a)+end:
            hint_id = 'monkey_insert'
        elif start >= len(b)+end:
            hint_id = 'monkey_remove'
        else:
            hint_id = 'monkey_change'

        pos_start = tokens[idx+start].pos
        pos_end = tokens[idx+len(a)+end-1].pos + len(tokens[idx+len(a)+end-1].val)
        pos_end = max(pos_end, pos_start+1)

        hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}]
        tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b]
    return hints