summaryrefslogtreecommitdiff
path: root/monkey/edits.py
diff options
context:
space:
mode:
Diffstat (limited to 'monkey/edits.py')
-rw-r--r--monkey/edits.py24
1 files changed, 12 insertions, 12 deletions
diff --git a/monkey/edits.py b/monkey/edits.py
index f27871b..ae44301 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -129,7 +129,7 @@ def get_edits_from_solutions(solutions, test):
# where they were observed.
submissions = collections.defaultdict(set)
queries = collections.Counter()
- edits = collections.defaultdict(list)
+ edits = collections.defaultdict(set)
for solution in solutions:
trace = solution.trace
@@ -137,8 +137,8 @@ def get_edits_from_solutions(solutions, test):
trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid)
# Update edits.
- for edit, features in trace_edits.items():
- edits[edit].extend(features)
+ for edit, uids in trace_edits.items():
+ edits[edit] |= uids
# Update submission/query counters (use normalized variables).
for code, correct in trace_submissions:
@@ -155,24 +155,24 @@ def get_edits_from_solutions(solutions, test):
n_start = collections.Counter()
n_start_all = 0
- for (path, a, b), features in edits.items():
- edits[(path, a, b)] = len(features)
- n_start[(path, a)] += len(features)
- n_start_all += len(features)
+ for (path, a, b), uids in edits.items():
+ edits[(path, a, b)] = (len(uids), uids)
+ n_start[(path, a)] += len(uids)
+ n_start_all += len(uids)
# Find the probability of each edit a → b.
new_edits = {}
- for (path, a, b), count in edits.items():
+ for (path, a, b), (count, uids) in edits.items():
if a != b:
p = count / n_start[(path, a)]
- new_edits[(path, a, b)] = p
+ new_edits[(path, a, b)] = (p, uids)
edits = new_edits
# Tweak the edit distribution to improve search.
if edits:
- avg_p = avg(edits.values())
- for edit, p in edits.items():
- edits[edit] = logistic(p, k=3, x_0=avg_p)
+ avg_p = avg([v[0] for v in edits.values()])
+ for edit, (p, uids) in edits.items():
+ edits[edit] = (logistic(p, k=3, x_0=avg_p), uids)
return edits, submissions, queries