From 740e128bb32075279dd62c677b27c645805cb5e2 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 11 Jan 2016 17:27:10 +0100 Subject: For each edit remember the user ids where it was seen --- monkey/edits.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'monkey/edits.py') diff --git a/monkey/edits.py b/monkey/edits.py index f27871b..ae44301 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -129,7 +129,7 @@ def get_edits_from_solutions(solutions, test): # where they were observed. submissions = collections.defaultdict(set) queries = collections.Counter() - edits = collections.defaultdict(list) + edits = collections.defaultdict(set) for solution in solutions: trace = solution.trace @@ -137,8 +137,8 @@ def get_edits_from_solutions(solutions, test): trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid) # Update edits. - for edit, features in trace_edits.items(): - edits[edit].extend(features) + for edit, uids in trace_edits.items(): + edits[edit] |= uids # Update submission/query counters (use normalized variables). for code, correct in trace_submissions: @@ -155,24 +155,24 @@ def get_edits_from_solutions(solutions, test): n_start = collections.Counter() n_start_all = 0 - for (path, a, b), features in edits.items(): - edits[(path, a, b)] = len(features) - n_start[(path, a)] += len(features) - n_start_all += len(features) + for (path, a, b), uids in edits.items(): + edits[(path, a, b)] = (len(uids), uids) + n_start[(path, a)] += len(uids) + n_start_all += len(uids) # Find the probability of each edit a → b. new_edits = {} - for (path, a, b), count in edits.items(): + for (path, a, b), (count, uids) in edits.items(): if a != b: p = count / n_start[(path, a)] - new_edits[(path, a, b)] = p + new_edits[(path, a, b)] = (p, uids) edits = new_edits # Tweak the edit distribution to improve search. if edits: - avg_p = avg(edits.values()) - for edit, p in edits.items(): - edits[edit] = logistic(p, k=3, x_0=avg_p) + avg_p = avg([v[0] for v in edits.values()]) + for edit, (p, uids) in edits.items(): + edits[edit] = (logistic(p, k=3, x_0=avg_p), uids) return edits, submissions, queries -- cgit v1.2.1