monkey/__init__.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

# CodeQ: an online programming tutor.
# Copyright (C) 2015 UL FRI
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import difflib
from itertools import product
import time

from prolog.util import tokenize, stringify, parse, interesting_ranges, rename_vars_list
from .util import PQueue, startswith, endswith

# Starting from [code], find a sequence of edits that transforms it into a
# correct predicate for [name]. The [test] function is used to test generated
# programs.
# Return (solution, edits, time spent, #programs checked). If no solution is
# found within [timeout] seconds, return solution='' and edits=[].
def fix(code, edits, test, timeout=30, debug=False):
    def step(program, path=None):
        tokens = program.leaves()
        for part, range_path in interesting_ranges(program):
            names = {}
            part_normal = tuple(rename_vars_list(part, names))
            for (path, a, b), (p, uids) in edits.items():
                if path == range_path and a == part_normal:
                    reverse_names = {v: k for k, v in names.items()}
                    b_real = tuple(rename_vars_list(b, reverse_names))
                    new_tokens = []
                    idx = None
                    for i, t in enumerate(tokens):
                        if t.pos >= part[0].pos and t.pos+len(t.val) <= part[-1].pos+len(part[-1].val):
                            if idx is None:
                                idx = i
                                new_tokens.extend(b_real)
                        else:
                            new_tokens.append(t)
                    new_code = stringify(new_tokens)
                    new_program = parse(new_code)
                    new_step = (idx, tuple(part), b_real)
                    if new_program is not None:
                        yield (new_program.freeze(), new_step, p)

    # Main loop: best-first search through generated programs.
    todo = PQueue()  # priority queue of candidate solutions
    done = set()     # programs we have already visited
    n_tested = 0
    start_time = time.monotonic()
    total_time = 0

    # Each program gets a task with the sequence of edits that generated it and
    # the associated cost. First candidate with cost 1 is the initial program.
    program = parse(code)
    if program is None:
        return '', [], total_time, n_tested
    todo.push((program.freeze(), (), 1.0), -1.0)

    while total_time < timeout:
        # Get next candidate.
        task = todo.pop()
        if task == None:
            break
        program, path, path_cost = task

        # If we have already seen this code, skip it.
        code = stringify(program)
        if code in done:
            continue
        done.add(code)

        # Print some info about the current task.
        if debug:
            print('Cost {:.12f}'.format(path_cost))
            for idx, a, b in path:
                print('{}: {} → {}'.format(idx, stringify(a), stringify(b)))

        # If the code is correct, we are done.
        n_correct, n_all = test(code)
        if n_correct == n_all:
            return code, path, total_time, n_tested
        n_tested += 1

        # Otherwise generate new solutions.
        for new_program, new_step, new_cost in step(program, path):
            new_path_cost = path_cost * new_cost
            if len(todo) > 500 and new_path_cost < 0.001:
                continue
            new_path = path + (new_step,)
            todo.push((new_program, new_path, new_path_cost), -new_path_cost)

        total_time = time.monotonic() - start_time

    return '', [], total_time, n_tested

# Find (minimal) token ranges in [a] that are changed in [b].
def min_diff(a, b):
    # Special case: inserting/deleting at beginning/end.
    if endswith(b, a):
        return [('insert', 0, 0, 0, len(b)-len(a))]
    if startswith(b, a):
        return [('insert', len(a), len(a), len(a), len(b))]
    if endswith(a, b):
        return [('remove', 0, len(a)-len(b), 0, 0)]
    if startswith(a, b):
        return [('remove', len(b), len(a), len(b), len(b))]
    return difflib.SequenceMatcher(a=a, b=b, autojunk=False).get_opcodes()

# Return a list of hint objects for the web app.
def fix_hints(code, steps):
    hints = []
    tokens = tokenize(code)
    for idx, a, b in steps:
        for tag, i1, i2, j1, j2 in min_diff(a, b):
            if tag == 'equal':
                continue
            if tag == 'insert':
                hint_id = 'monkey_insert'
                if idx+i1 >= len(tokens):
                    pos_start = len(code)-1
                else:
                    pos_start = tokens[idx+i1].pos
                pos_end = pos_start+1
            elif tag == 'remove':
                hint_id = 'monkey_remove'
                pos_start = tokens[idx+i1].pos
                pos_end = tokens[idx+i2-1].pos + len(tokens[idx+i2-1].val)
            elif tag == 'replace':
                hint_id = 'monkey_change'
                pos_start = tokens[idx+i1].pos
                pos_end = tokens[idx+i2-1].pos + len(tokens[idx+i2-1].val)
            hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}]
        tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b]

    changed = True
    while changed:
        changed = False
        for h1, h2 in product(hints, hints):
            if h1 is h2 or h2['start'] <= h1['start']:
                continue
            if h1['id'] == h2['id'] == 'monkey_insert' and h2['start'] - h1['start'] < 3:
                hints.remove(h1)
                hints.remove(h2)
                hints.append({'id': 'monkey_change', 'start': h1['start'], 'end': h2['end']-1})
                changed = True
                break

    return hints