class TrieNode:
def __init__(self, label):
self.label = label
self.neighbors = {}
self.is_leaf = True
def add_neighbor(self, character):
self.is_leaf = False
self.neighbors[character] = TrieNode(self.label + character)
def has_neighbor(self, character):
if character in self.neighbors:
return True
return False
def get_neighbor(self, character):
if self.has_neighbor(character):
return self.neighbors[character]
return None
def get_neighbors(self):
return self.neighbors
def trie_construction(patterns):
root = TrieNode('')
for pattern in patterns:
current_node = root
for current_symbol in pattern:
if not current_node.has_neighbor(current_symbol):
current_node.add_neighbor(current_symbol)
current_node = current_node.get_neighbor(current_symbol)
return root
patterns = ['ab','banana', 'ananas', 'cucumber']
trie_construction(patterns)
def prefix_trie_matching(text, trie):
current_node = trie
for c in text:
if current_node.is_leaf:
return current_node.label
if current_node.has_neighbor(c):
current_node = current_node.get_neighbor(c)
else:
return False
if current_node.is_leaf:
return current_node.label
else:
return False
text = 'bananas'
patterns = ['ab', 'bananas', 'ananas', 'cucumber']
trie = trie_construction(patterns)
prefix_trie_matching(text, trie)
def trie_matching(text, trie):
results = []
n = len(text)
for i in range(n):
match = prefix_trie_matching(text[i:], trie)
if match:
results.append((match, i))
return results
text = 'babananas'
patterns = ['ab', 'bananas', 'ananas', 'cucumber']
trie = trie_construction(patterns)
trie_matching(text, trie)
def generate_suffixes(text):
suffixes = []
n = len(text)
for i in range(n):
suffixes.append(text[i:])
return suffixes
text = 'panamabananas$'
generate_suffixes(text)
def generate_suffix_array(text):
raw_suffixes = generate_suffixes(text)
n = len(text)
suffixes = list(zip(raw_suffixes, [i for i in range(n)]))
suffixes.sort()
return suffixes
text = 'panamabananas$'
generate_suffix_array(text)
text = 'banana$'
suffixes = generate_suffixes(text)
trie_construction([x[0] for x in suffixes])
def is_prefix(pattern, text):
n = len(pattern)
m = len(text)
if n > m:
return False
return text[:n] == pattern
is_prefix('ac', 'abbc')
def pattern_matching_with_suffix_array(pattern, suffix_array):
n = len(suffix_array)
l = 0
d = n
while l <= d:
mid = (l + d) // 2
mid_suffix = suffix_array[mid][0]
if is_prefix(pattern, mid_suffix):
i = mid - 1
j = mid + 1
while i >= 0:
if not is_prefix(pattern, suffix_array[i][0]):
i += 1
break
i -= 1
while j < n:
if not is_prefix(pattern, suffix_array[j][0]):
break
j += 1
return [x[1] for x in suffix_array[i:j]]
if mid_suffix < pattern:
l = mid
else:
d = mid
return []
pattern = 'na'
text = 'panamabananas$'
suffix_array = generate_suffix_array(text)
pattern_matching_with_suffix_array(pattern, suffix_array)
def generate_cyclic_permutations(text):
permutations = []
n = len(text)
for i in range(n):
prefix = text[:(n-i)]
suffix = text[(n-i):]
permutations.append(suffix + prefix)
return permutations
def bwt_construction(text):
permutations = generate_cyclic_permutations(text)
permutations.sort()
bwt_text = ''.join([x[-1] for x in permutations])
return bwt_text
text = 'banana$'
bwt_text = bwt_construction(text)
bwt_list = list(bwt_text)
bwt_list.sort()
bwt_first_column = ''.join(bwt_list)
print(bwt_first_column)
print(bwt_text)
def inverse_bwt(bwt_text):
n = len(bwt_text)
last_column = list(bwt_text)
columns = last_column[:]
columns.sort()
result_index = bwt_text.index('$')
for i in range(n - 1):
for j in range(n):
columns[j] = last_column[j] + columns[j]
columns.sort()
return columns[result_index]
inverse_bwt('annb$aa')
def last_to_first(first_column, last_column, index):
char_at_index = last_column[index]
n = len(last_column)
rank = 0;
for i in range(index+1):
if last_column[i] == char_at_index:
rank += 1
first_column_rank = 0
for i in range(n):
if first_column[i] == char_at_index:
first_column_rank += 1
if first_column_rank == rank:
return i
last_to_first('$aaabnn', 'annb$aa', 5)
def bw_matching(first_column, last_column, pattern):
top = 0
bottom = len(first_column) - 1
m = len(pattern)
j = 0
while top < bottom:
print('top: {}, bottom: {}'.format(top, bottom))
if j < m:
symbol = pattern[j]
j += 1
top_index = -1
bottom_index = -1
for i in range(top,bottom + 1):
if top_index == -1 and last_column[i] == symbol:
top_index = i
bottom_index = i
elif last_column[i] == symbol:
bottom_index = i
if top_index == -1 or bottom_index == -1:
return 0
top = last_to_first(first_column, last_column, top_index)
bottom = last_to_first(first_column, last_column, bottom_index)
else:
break
return bottom - top + 1
word = 'panamabananas$'
pattern = 'ana'
last_column = bwt_construction(word)
first_column = list(last_column[:])
first_column.sort()
bw_matching(first_column, last_column, pattern)