summaryrefslogtreecommitdiff
path: root/monkey/util.py
blob: 93c6d94280f395112c8cb5f846137300371315b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/python3

from heapq import heappush, heappop
import itertools
import math

# A simple priority queue based on the heapq class.
class PQueue(object):
    REMOVED = '<removed-task>'      # placeholder for a removed task

    def __init__(self):
        self.pq = []                         # list of entries arranged in a heap
        self.entry_finder = {}               # mapping of tasks to entries
        self.counter = itertools.count()     # unique sequence count
        self.size = 0

    def push(self, task, priority=0):
        'Add a new task or update the priority of an existing task'
        if task in self.entry_finder:
            self.remove(task)
        else:
            self.size += 1
        entry = [priority, next(self.counter), task]
        self.entry_finder[task] = entry
        heappush(self.pq, entry)

    def remove(self, task):
        'Mark an existing task as REMOVED.  Raise KeyError if not found.'
        entry = self.entry_finder.pop(task)
        entry[-1] = self.REMOVED
        self.size -= 1

    def pop(self):
        'Remove and return the lowest priority task. Raise KeyError if empty.'
        while self.pq:
            priority, count, task = heappop(self.pq)
            if task is not self.REMOVED:
                del self.entry_finder[task]
                self.size -= 1
                return task
        return None

    def __len__(self):
        return self.size

# Return [n]th line in [text].
def get_line(text, n):
    lines = text.split('\n')
    if n >= 0 and n < len(lines):
        return lines[n]
    return None

# Indent each line in [text] by [indent] spaces.
def indent(text, indent=2):
    return '\n'.join([' '*indent+line for line in text.split('\n')])

# Average in a list.
def avg(l):
    return sum(l)/len(l)

# Logistic function.
def logistic(x, L=1, k=1, x_0=0):
    return L/(1+math.exp(-k*(x-x_0)))

# Damerau-Levenshtein distance between sequences [a] and [b]. Based on
# https://thetaiko.wordpress.com/2011/01/21/damerau-levenshtein-distance-in-python/
def damerau_levenshtein(a, b):
    # Construct an alphabet out of characters in [a] and [b] with a sequential
    # identifier for each word.
    alphabet = {}
    for char in itertools.chain(a, b):
        if char not in alphabet:
            alphabet[char] = len(alphabet)

    # Initialize distance matrix.
    inf = len(a) + len(b) + 1
    h = [[0]*(len(b)+2) for i in range(len(a)+2)]
    h[0][0] = inf
    for i in range(0, len(a)+1):
        h[i+1][1] = i
        h[i+1][0] = inf
    for j in range(0, len(b)+1):
        h[1][j+1] = j
        h[0][j+1] = inf

    # Do the magic.
    da = [0 for x in range(0, len(alphabet))]
    for i in range(1, len(a)+1):
        db = 0
        for j in range(1, len(b)+1):
            i1 = da[alphabet[b[j-1]]]
            j1 = db
            d = 1
            if (a[i-1] == b[j-1]):
                d = 0
                db = j
            h[i+1][j+1] = min(
                h[i][j] + d,                      # substitution
                h[i+1][j] + 1,                    # insertion
                h[i][j+1] + 1,                    # deletion
                h[i1][j1] + (i-i1-1)+1+(j-j1-1))  # transposition
        da[alphabet[a[i-1]]] = i

    return h[len(a)+1][len(b)+1]