summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2016-03-02 18:39:17 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2016-03-02 18:39:17 +0100
commit3af76b914dc48f7c729b34d8605bb2ad51a5156b (patch)
tree119abc3510895f20419c3177309a2ffc043933fa
parent1c76b686a5896950b0037e773d5229e401e980b3 (diff)
Improve hints returned by monkey.fix_hints
-rw-r--r--monkey/__init__.py70
-rw-r--r--monkey/util.py12
2 files changed, 60 insertions, 22 deletions
diff --git a/monkey/__init__.py b/monkey/__init__.py
index 167d0f2..53d5b8a 100644
--- a/monkey/__init__.py
+++ b/monkey/__init__.py
@@ -14,10 +14,12 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+import difflib
+from itertools import product
import time
from prolog.util import tokenize, stringify, parse, interesting_ranges, rename_vars_list
-from .util import PQueue
+from .util import PQueue, startswith, endswith
# Starting from [code], find a sequence of edits that transforms it into a
# correct predicate for [name]. The [test] function is used to test generated
@@ -100,32 +102,56 @@ def fix(code, edits, test, timeout=30, debug=False):
return '', [], total_time, n_tested
+# Find (minimal) token ranges in [a] that are changed in [b].
def min_diff(a, b):
- first = 0
- while first < len(a) and first < len(b) and a[first] == b[first]:
- first += 1
- last = 0
- while first-last < len(a) and first-last < len(b) and a[last-1] == b[last-1]:
- last -= 1
- return first, last
-
+ # Special case: inserting/deleting at beginning/end.
+ if endswith(b, a):
+ return [('insert', 0, 0, 0, len(b)-len(a))]
+ if startswith(b, a):
+ return [('insert', len(a), len(a), len(a), len(b))]
+ if endswith(a, b):
+ return [('remove', 0, len(a)-len(b), 0, 0)]
+ if startswith(a, b):
+ return [('remove', len(b), len(a), len(b), len(b))]
+ return difflib.SequenceMatcher(a=a, b=b, autojunk=False).get_opcodes()
+
+# Return a list of hint objects for the web app.
def fix_hints(code, steps):
hints = []
tokens = tokenize(code)
for idx, a, b in steps:
- start, end = min_diff(a, b)
-
- if start >= len(a)+end:
- hint_id = 'monkey_insert'
- elif start >= len(b)+end:
- hint_id = 'monkey_remove'
- else:
- hint_id = 'monkey_change'
+ for tag, i1, i2, j1, j2 in min_diff(a, b):
+ if tag == 'equal':
+ continue
+ if tag == 'insert':
+ hint_id = 'monkey_insert'
+ if idx+i1 >= len(tokens):
+ pos_start = len(code)-1
+ else:
+ pos_start = tokens[idx+i1].pos
+ pos_end = pos_start+1
+ elif tag == 'delete':
+ hint_id = 'monkey_remove'
+ pos_start = tokens[idx+i1].pos
+ pos_end = tokens[idx+i2-1].pos + len(tokens[idx+i2-1].val)
+ elif tag == 'replace':
+ hint_id = 'monkey_change'
+ pos_start = tokens[idx+i1].pos
+ pos_end = tokens[idx+i2-1].pos + len(tokens[idx+i2-1].val)
+ hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}]
+ tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b]
- pos_start = tokens[idx+start].pos
- pos_end = tokens[idx+len(a)+end-1].pos + len(tokens[idx+len(a)+end-1].val)
- pos_end = max(pos_end, pos_start+1)
+ changed = True
+ while changed:
+ changed = False
+ for h1, h2 in product(hints, hints):
+ if h1 is h2 or h2['start'] <= h1['start']:
+ continue
+ if h1['id'] == h2['id'] == 'monkey_insert' and h2['start'] - h1['start'] < 3:
+ hints.remove(h1)
+ hints.remove(h2)
+ hints.append({'id': 'monkey_change', 'start': h1['start'], 'end': h2['end']-1})
+ changed = True
+ break
- hints += [{'id': hint_id, 'start': pos_start, 'end': pos_end}]
- tokens[idx:idx+len(a)] = [t.clone(pos=tokens[idx].pos) for t in b]
return hints
diff --git a/monkey/util.py b/monkey/util.py
index 655cca9..3940f84 100644
--- a/monkey/util.py
+++ b/monkey/util.py
@@ -63,6 +63,18 @@ def get_line(text, n):
return lines[n]
return None
+# Does sequence [a] begin with sequence [b]?
+def startswith(a, b):
+ if len(a) < len(b):
+ return False
+ return all(x == y for x, y in zip(a, b))
+
+# Does sequence [a] end with sequence [b]?
+def endswith(a, b):
+ if len(a) < len(b):
+ return False
+ return all(x == y for x, y in zip(reversed(a), reversed(b)))
+
# Indent each line in [text] by [indent] spaces.
def indent(text, indent=2):
return '\n'.join([' '*indent+line for line in text.split('\n')])