summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimotej Lazar <timotej.lazar@fri.uni-lj.si>2016-01-11 17:27:10 +0100
committerTimotej Lazar <timotej.lazar@fri.uni-lj.si>2016-01-11 17:27:10 +0100
commit740e128bb32075279dd62c677b27c645805cb5e2 (patch)
tree48fb1514c6f5f330b693851655778d10dfad13d7
parent9482cbd8644da8999051a7a6d892abb046f8d49a (diff)
For each edit remember the user ids where it was seen
-rw-r--r--monkey/__init__.py2
-rw-r--r--monkey/edits.py24
2 files changed, 13 insertions, 13 deletions
diff --git a/monkey/__init__.py b/monkey/__init__.py
index ec09e62..83ea215 100644
--- a/monkey/__init__.py
+++ b/monkey/__init__.py
@@ -30,7 +30,7 @@ def fix(code, edits, test, timeout=30, debug=False):
for part, range_path in interesting_ranges(program):
names = {}
part_normal = tuple(rename_vars_list(part, names))
- for (path, a, b), p in edits.items():
+ for (path, a, b), (p, uids) in edits.items():
if path == range_path and a == part_normal:
reverse_names = {v: k for k, v in names.items()}
b_real = tuple(rename_vars(b, reverse_names))
diff --git a/monkey/edits.py b/monkey/edits.py
index f27871b..ae44301 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -129,7 +129,7 @@ def get_edits_from_solutions(solutions, test):
# where they were observed.
submissions = collections.defaultdict(set)
queries = collections.Counter()
- edits = collections.defaultdict(list)
+ edits = collections.defaultdict(set)
for solution in solutions:
trace = solution.trace
@@ -137,8 +137,8 @@ def get_edits_from_solutions(solutions, test):
trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid)
# Update edits.
- for edit, features in trace_edits.items():
- edits[edit].extend(features)
+ for edit, uids in trace_edits.items():
+ edits[edit] |= uids
# Update submission/query counters (use normalized variables).
for code, correct in trace_submissions:
@@ -155,24 +155,24 @@ def get_edits_from_solutions(solutions, test):
n_start = collections.Counter()
n_start_all = 0
- for (path, a, b), features in edits.items():
- edits[(path, a, b)] = len(features)
- n_start[(path, a)] += len(features)
- n_start_all += len(features)
+ for (path, a, b), uids in edits.items():
+ edits[(path, a, b)] = (len(uids), uids)
+ n_start[(path, a)] += len(uids)
+ n_start_all += len(uids)
# Find the probability of each edit a → b.
new_edits = {}
- for (path, a, b), count in edits.items():
+ for (path, a, b), (count, uids) in edits.items():
if a != b:
p = count / n_start[(path, a)]
- new_edits[(path, a, b)] = p
+ new_edits[(path, a, b)] = (p, uids)
edits = new_edits
# Tweak the edit distribution to improve search.
if edits:
- avg_p = avg(edits.values())
- for edit, p in edits.items():
- edits[edit] = logistic(p, k=3, x_0=avg_p)
+ avg_p = avg([v[0] for v in edits.values()])
+ for edit, (p, uids) in edits.items():
+ edits[edit] = (logistic(p, k=3, x_0=avg_p), uids)
return edits, submissions, queries