From 740e128bb32075279dd62c677b27c645805cb5e2 Mon Sep 17 00:00:00 2001
From: Timotej Lazar <timotej.lazar@fri.uni-lj.si>
Date: Mon, 11 Jan 2016 17:27:10 +0100
Subject: For each edit remember the user ids where it was seen

---
 monkey/__init__.py |  2 +-
 monkey/edits.py    | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/monkey/__init__.py b/monkey/__init__.py
index ec09e62..83ea215 100644
--- a/monkey/__init__.py
+++ b/monkey/__init__.py
@@ -30,7 +30,7 @@ def fix(code, edits, test, timeout=30, debug=False):
         for part, range_path in interesting_ranges(program):
             names = {}
             part_normal = tuple(rename_vars_list(part, names))
-            for (path, a, b), p in edits.items():
+            for (path, a, b), (p, uids) in edits.items():
                 if path == range_path and a == part_normal:
                     reverse_names = {v: k for k, v in names.items()}
                     b_real = tuple(rename_vars(b, reverse_names))
diff --git a/monkey/edits.py b/monkey/edits.py
index f27871b..ae44301 100644
--- a/monkey/edits.py
+++ b/monkey/edits.py
@@ -129,7 +129,7 @@ def get_edits_from_solutions(solutions, test):
     # where they were observed.
     submissions = collections.defaultdict(set)
     queries = collections.Counter()
-    edits = collections.defaultdict(list)
+    edits = collections.defaultdict(set)
 
     for solution in solutions:
         trace = solution.trace
@@ -137,8 +137,8 @@ def get_edits_from_solutions(solutions, test):
         trace_edits, trace_submissions, trace_queries = get_edits_from_trace(trace, test, uid)
 
         # Update edits.
-        for edit, features in trace_edits.items():
-            edits[edit].extend(features)
+        for edit, uids in trace_edits.items():
+            edits[edit] |= uids
 
         # Update submission/query counters (use normalized variables).
         for code, correct in trace_submissions:
@@ -155,24 +155,24 @@ def get_edits_from_solutions(solutions, test):
 
     n_start = collections.Counter()
     n_start_all = 0
-    for (path, a, b), features in edits.items():
-        edits[(path, a, b)] = len(features)
-        n_start[(path, a)] += len(features)
-        n_start_all += len(features)
+    for (path, a, b), uids in edits.items():
+        edits[(path, a, b)] = (len(uids), uids)
+        n_start[(path, a)] += len(uids)
+        n_start_all += len(uids)
 
     # Find the probability of each edit a → b.
     new_edits = {}
-    for (path, a, b), count in edits.items():
+    for (path, a, b), (count, uids) in edits.items():
         if a != b:
             p = count / n_start[(path, a)]
-            new_edits[(path, a, b)] = p
+            new_edits[(path, a, b)] = (p, uids)
     edits = new_edits
 
     # Tweak the edit distribution to improve search.
     if edits:
-        avg_p = avg(edits.values())
-        for edit, p in edits.items():
-            edits[edit] = logistic(p, k=3, x_0=avg_p)
+        avg_p = avg([v[0] for v in edits.values()])
+        for edit, (p, uids) in edits.items():
+            edits[edit] = (logistic(p, k=3, x_0=avg_p), uids)
 
     return edits, submissions, queries
 
-- 
cgit v1.2.1