From 3af76b914dc48f7c729b34d8605bb2ad51a5156b Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Wed, 2 Mar 2016 18:39:17 +0100 Subject: Improve hints returned by monkey.fix_hints --- monkey/__init__.py | 70 +++++++++++++++++++++++++++++++++++++----------------- monkey/util.py | 12 ++++++++++ 2 files changed, 60 insertions(+), 22 deletions(-) (limited to 'monkey') diff --git a/monkey/__init__.py b/monkey/__init__.py index 167d0f2..53d5b8a 100644 --- a/monkey/__init__.py +++ b/monkey/__init__.py @@ -14,10 +14,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import difflib +from itertools import product import time from prolog.util import tokenize, stringify, parse, interesting_ranges, rename_vars_list -from .util import PQueue +from .util import PQueue, startswith, endswith # Starting from [code], find a sequence of edits that transforms it into a # correct predicate for [name]. The [test] function is used to test generated @@ -100,32 +102,56 @@ def fix(code, edits, test, timeout=30, debug=False): return '', [], total_time, n_tested +# Find (minimal) token ranges in [a] that are changed in [b]. def min_diff(a, b): - first = 0 - while first < len(a) and first < len(b) and a[first] == b[first]: - first += 1 - last = 0 - while first-last < len(a) and first-last < len(b) and a[last-1] == b[last-1]: - last -= 1 - return first, last - + # Special case: inserting/deleting at beginning/end. + if endswith(b, a): + return [('insert', 0, 0, 0, len(b)-len(a))] + if startswith(b, a): + return [('insert', len(a), len(a), len(a), len(b))] + if endswith(a, b): + return [('remove', 0, len(a)-len(b), 0, 0)] + if startswith(a, b): + return [('remove', len(b), len(a), len(b), len(b))] + return difflib.SequenceMatcher(a=a, b=b, autojunk=False).get_opcodes() + +# Return a list of hint objects for the web app. def fix_hints(code, steps): hints = [] tokens = tokenize(code) for idx, a, b in steps: - start, end = min_diff(a, b) - - if start >= len(a)+end: - hint_id = 'monkey_insert' - elif start >= len(b)+end: - hint_id = 'monkey_remove' - else: - hint_id = 'monkey_change' + for tag, i1, i2, j1, j2 in min_diff(a, b): + if tag == 'equal': + continue + if tag == 'insert': + hint_id = 'monkey_insert' + if idx+i1 >= len(tokens): + pos_start = len(code)-1 + else: + pos_start = tokens[idx+i1].pos + pos_end = pos_start+1 + elif tag == 'delete': + hint_id = 'monkey_remove' + pos_start = tokens[idx+i1].pos + pos_end = tokens[idx+i2-1].pos + len(tokens[idx+i2-1].val) + elif tag == 'replace': + hint_id = 'monkey_change' + pos_start = tokens[idx+i1].pos + pos_end = tokens[idx+i2-1].pos + len(tokens[idx+i2-1].val) + hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}] + tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b] - pos_start = tokens[idx+start].pos - pos_end = tokens[idx+len(a)+end-1].pos + len(tokens[idx+len(a)+end-1].val) - pos_end = max(pos_end, pos_start+1) + changed = True + while changed: + changed = False + for h1, h2 in product(hints, hints): + if h1 is h2 or h2['start'] <= h1['start']: + continue + if h1['id'] == h2['id'] == 'monkey_insert' and h2['start'] - h1['start'] < 3: + hints.remove(h1) + hints.remove(h2) + hints.append({'id': 'monkey_change', 'start': h1['start'], 'end': h2['end']-1}) + changed = True + break - hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}] - tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b] return hints diff --git a/monkey/util.py b/monkey/util.py index 655cca9..3940f84 100644 --- a/monkey/util.py +++ b/monkey/util.py @@ -63,6 +63,18 @@ def get_line(text, n): return lines[n] return None +# Does sequence [a] begin with sequence [b]? +def startswith(a, b): + if len(a) < len(b): + return False + return all(x == y for x, y in zip(a, b)) + +# Does sequence [a] end with sequence [b]? +def endswith(a, b): + if len(a) < len(b): + return False + return all(x == y for x, y in zip(reversed(a), reversed(b))) + # Indent each line in [text] by [indent] spaces. def indent(text, indent=2): return '\n'.join([' '*indent+line for line in text.split('\n')]) -- cgit v1.2.1