From 740e128bb32075279dd62c677b27c645805cb5e2 Mon Sep 17 00:00:00 2001 From: Timotej Lazar Date: Mon, 11 Jan 2016 17:27:10 +0100 Subject: For each edit remember the user ids where it was seen --- monkey/__init__.py | 2 +- monkey/edits.py | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/monkey/__init__.py b/monkey/__init__.py index ec09e62..83ea215 100644 --- a/monkey/__init__.py +++ b/monkey/__init__.py @@ -30,7 +30,7 @@ def fix(code, edits, test, timeout=30, debug=False): for part, range_path in interesting_ranges(program): names = {} part_normal = tuple(rename_vars_list(part, names)) - for (path, a, b), p in edits.items(): + for (path, a, b), (p, uids) in edits.items(): if path == range_path and a == part_normal: reverse_names = {v: k for k, v in names.items()} b_real = tuple(rename_vars(b, reverse_names)) diff --git a/monkey/edits.py b/monkey/edits.py index f27871b..ae44301 100644 --- a/monkey/edits.py +++ b/monkey/edits.py @@ -129,7 +129,7 @@ def get_edits_from_solutions(solutions, test): # where they were observed. submissions = collections.defaultdict(set) queries = collections.Counter() - edits = collections.defaultdict(list) + edits = collections.defaultdict(set) for solution in solutions: trace = solution.trace @@ -137,8 +137,8 @@ def get_edits_from_solutions(solutions, test): trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid) # Update edits. - for edit, features in trace_edits.items(): - edits[edit].extend(features) + for edit, uids in trace_edits.items(): + edits[edit] |= uids # Update submission/query counters (use normalized variables). for code, correct in trace_submissions: @@ -155,24 +155,24 @@ def get_edits_from_solutions(solutions, test): n_start = collections.Counter() n_start_all = 0 - for (path, a, b), features in edits.items(): - edits[(path, a, b)] = len(features) - n_start[(path, a)] += len(features) - n_start_all += len(features) + for (path, a, b), uids in edits.items(): + edits[(path, a, b)] = (len(uids), uids) + n_start[(path, a)] += len(uids) + n_start_all += len(uids) # Find the probability of each edit a → b. new_edits = {} - for (path, a, b), count in edits.items(): + for (path, a, b), (count, uids) in edits.items(): if a != b: p = count / n_start[(path, a)] - new_edits[(path, a, b)] = p + new_edits[(path, a, b)] = (p, uids) edits = new_edits # Tweak the edit distribution to improve search. if edits: - avg_p = avg(edits.values()) - for edit, p in edits.items(): - edits[edit] = logistic(p, k=3, x_0=avg_p) + avg_p = avg([v[0] for v in edits.values()]) + for edit, (p, uids) in edits.items(): + edits[edit] = (logistic(p, k=3, x_0=avg_p), uids) return edits, submissions, queries -- cgit v1.2.1