1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
#!/usr/bin/python3
from heapq import heappush, heappop
import itertools
import math
# A simple priority queue based on the heapq class.
class PQueue(object):
REMOVED = '<removed-task>' # placeholder for a removed task
def __init__(self):
self.pq = [] # list of entries arranged in a heap
self.entry_finder = {} # mapping of tasks to entries
self.counter = itertools.count() # unique sequence count
self.size = 0
def push(self, task, priority=0):
'Add a new task or update the priority of an existing task'
if task in self.entry_finder:
self.remove(task)
else:
self.size += 1
entry = [priority, next(self.counter), task]
self.entry_finder[task] = entry
heappush(self.pq, entry)
def remove(self, task):
'Mark an existing task as REMOVED. Raise KeyError if not found.'
entry = self.entry_finder.pop(task)
entry[-1] = self.REMOVED
self.size -= 1
def pop(self):
'Remove and return the lowest priority task. Raise KeyError if empty.'
while self.pq:
priority, count, task = heappop(self.pq)
if task is not self.REMOVED:
del self.entry_finder[task]
self.size -= 1
return task
return None
def __len__(self):
return self.size
# Return [n]th line in [text].
def get_line(text, n):
lines = text.split('\n')
if n >= 0 and n < len(lines):
return lines[n]
return None
# Indent each line in [text] by [indent] spaces.
def indent(text, indent=2):
return '\n'.join([' '*indent+line for line in text.split('\n')])
# Average in a list.
def avg(l):
return sum(l)/len(l)
# Logistic function.
def logistic(x, L=1, k=1, x_0=0):
return L/(1+math.exp(-k*(x-x_0)))
# Damerau-Levenshtein distance between sequences [a] and [b]. Based on
# https://thetaiko.wordpress.com/2011/01/21/damerau-levenshtein-distance-in-python/
def damerau_levenshtein(a, b):
# Construct an alphabet out of characters in [a] and [b] with a sequential
# identifier for each word.
alphabet = {}
for char in itertools.chain(a, b):
if char not in alphabet:
alphabet[char] = len(alphabet)
# Initialize distance matrix.
inf = len(a) + len(b) + 1
h = [[0]*(len(b)+2) for i in range(len(a)+2)]
h[0][0] = inf
for i in range(0, len(a)+1):
h[i+1][1] = i
h[i+1][0] = inf
for j in range(0, len(b)+1):
h[1][j+1] = j
h[0][j+1] = inf
# Do the magic.
da = [0 for x in range(0, len(alphabet))]
for i in range(1, len(a)+1):
db = 0
for j in range(1, len(b)+1):
i1 = da[alphabet[b[j-1]]]
j1 = db
d = 1
if (a[i-1] == b[j-1]):
d = 0
db = j
h[i+1][j+1] = min(
h[i][j] + d, # substitution
h[i+1][j] + 1, # insertion
h[i][j+1] + 1, # deletion
h[i1][j1] + (i-i1-1)+1+(j-j1-1)) # transposition
da[alphabet[a[i-1]]] = i
return h[len(a)+1][len(b)+1]
|