In [14]:
def limb(D, j, n):
    
    min_i = None
    min_k = None
    
    min_length = float('inf')
    
    for i in range(n):
        for k in range(n):
            if i != j and k != j:
                length = (D[j][k] + D[i][j] - D[i][k]) / 2
                if length < min_length:
                    min_length = length
                    min_i = i
                    min_k = k
                    
    return (min_i, min_k, min_length)
In [15]:
i = 0
j = 1
k = 2
l = 3
limb(D, l, 3)
Out[15]:
(0, 2, 7.0)
In [16]:
G = {
    'a': [('b', 3)],
    'b': [('a', 3), ('c', 2), ('d', 1)],
    'c': [('b', 2)],
    'd': [('b', 1)]
}
In [17]:
def get_neighbors(G, v):
    if v in G:
        return G[v]
    else:
        []
In [18]:
get_neighbors(G, 'a')
Out[18]:
[('b', 3)]
In [19]:
def find_path(G, source, destination):
    stack = [source]
    visited = set([source])
    
    while len(stack) > 0:
        v = stack[-1]
        
        if v == destination:
            return stack
        
        has_neighbors = False
        
        for (w, weight) in get_neighbors(G, v):
            if w not in visited:
                stack.append(w)
                visited.add(w)
                has_neighbors = True
                break
                
        if not has_neighbors:
            stack.pop()
            
    print('Path not found')
    return []
In [20]:
find_path(G, 'a', 'c')
Out[20]:
['a', 'b', 'c']
In [21]:
def distance_between_nodes(G, node_i, node_j):
    neighbors = get_neighbors(G, node_i)
    
    for (w, weight) in neighbors:
        if w == node_j:
            return weight
        
    return None
In [22]:
distance_between_nodes(G, 'a', 'b')
Out[22]:
3
In [23]:
def add_node_on_path(G, source, destination, distance):
    path = find_path(G, source, destination)
    
    i = 0
    j = 1
    
    node_i = path[i]
    node_j = path[j]
    
    current_distance = distance_between_nodes(G, node_i, node_j)
    
    while current_distance < distance:
        i += 1
        j += 1
        
        node_i = path[i]
        node_j = path[j]
        
        current_distance += distance_between_nodes(G, node_i, node_j)
        
    if current_distance == distance:
        return node_j
    
    else:
        distance_between = current_distance - distance
        return new_node_between_nodes(G, node_j, node_i, distance_between)
In [24]:
def add_neighbor(G, node, neighbor, distance):
    G[node].append((neighbor, distance))
    
    if neighbor not in G:
        G[neighbor] = []
        
    G[neighbor].append((node, distance))
In [25]:
def remove_neighbor(G, node, neighbor, distance):
    G[node].remove((neighbor, distance))
    G[neighbor].remove((node, distance))
In [26]:
def new_node_between_nodes(G, node_i, node_j, distance_between):
    new_node = 'X{}{}'.format(node_i, node_j)
    
    distance_ij = distance_between_nodes(G, node_i, node_j)
    distance_diff = distance_ij - distance_between
    
    remove_neighbor(G, node_i, node_j, distance_ij)
    
    add_neighbor(G, node_i, new_node, distance_between)
    add_neighbor(G, new_node, node_j, distance_diff)
    
    return new_node
In [27]:
G = {
    'a': [('b', 3)],
    'b': [('a', 3), ('c', 2), ('d', 1)],
    'c': [('b', 2)],
    'd': [('b', 1)]
}

new_node = new_node_between_nodes(G, 'b', 'c', 1.5)
print(new_node)
print(G)
Xbc
{'a': [('b', 3)], 'b': [('a', 3), ('d', 1), ('Xbc', 1.5)], 'c': [('Xbc', 0.5)], 'd': [('b', 1)], 'Xbc': [('b', 1.5), ('c', 0.5)]}
In [28]:
def additive_phylogeny(D, n):
    if n == 2:
        return {
            0: [(1, D[0][1])],
            1: [(0, D[0][1])]
        }
    
    (i, k, limb_length) = limb(D, n - 1, n)
    
    for j in range(n - 1):
        D[j][n - 1] -= limb_length
        D[n - 1][j] = D[j][n - 1]
        
    x = D[i][n - 1]
    
    T = additive_phylogeny(D, n - 1)
    v = add_node_on_path(T, i, k, x)
    add_neighbor(T, v, n-1, limb_length)
    
    return T
In [29]:
D = [[ 0, 13, 21, 22],
     [13,  0, 12, 13],
     [21, 12,  0, 13],
     [22, 13, 13,  0]]

n = 4

additive_phylogeny(D, n)
Out[29]:
{0: [('X10', 11.0)],
 1: [('X10', 2.0)],
 'X10': [(1, 2.0), (0, 11.0), ('X2X10', 4.0)],
 2: [('X2X10', 6.0)],
 'X2X10': [(2, 6.0), ('X10', 4.0), (3, 7.0)],
 3: [('X2X10', 7.0)]}
In [42]:
class Cluster:
    def __init__(self, elements):
        self.elements = elements
        self.age = 0
        self.left = None
        self.right = None
        self.label = str(elements)
        
    def add_children(self, left, right):
        self.left = left
        self.right = right
        
    def update_age(self, age):
        self.age = age
        
    def __str__(self):
        return 'Label: {}, Age: {}'.format(self.label, self.age)
In [ ]:
c1 = Cluster([1,2])
c2 = Cluster([3,4])

print(c1)
print(c2)

c1.update_age(10)
print(c1)

c3 = Cluster([])
c3.add_children(c1, c2)

print('Left: ')
print(c3.left)
print('Right: ')
print(c3.right)
In [38]:
def distance(D, cluster_1, cluster_2):
    d = 0
    
    n = len(cluster_1.elements)
    m = len(cluster_2.elements)
    
    for i in cluster_1.elements:
        for j in cluster_2.elements:
            d += D[i][j]
            
    d = d / (n * m)
    return d
In [39]:
def two_closest(clusters, D):
    min_i = None
    min_j = None
    
    min_distance = float('inf')
    
    for cluster_i in clusters:
        for cluster_j in clusters:
            if cluster_i != cluster_j:
                current_distance = distance(D, cluster_i, cluster_j)
                if current_distance < min_distance:
                    min_distance = current_distance
                    min_i = cluster_i
                    min_j = cluster_j
                    
    return (min_i, min_j, min_distance)
In [40]:
def create_clusters(D):
    n = len(D)
    clusters = []
    
    for i in range(n):
        clusters.append(Cluster([i]))
        
    return clusters
In [58]:
clusters = create_clusters(D)
for c in clusters:
    print(c)

print()
print(D)
print()
(c1, c2, dist) = two_closest(clusters, D)
print('C1: ')
print(c1)
print('C2: ')
print(c2)
print('Distance: {}'.format(dist))
Label: [0], Age: 0
Label: [1], Age: 0
Label: [2], Age: 0
Label: [3], Age: 0

[[0, 13, 11.0, 15.0], [13, 0, 2.0, 6.0], [11.0, 2.0, 0, 6.0], [15.0, 6.0, 6.0, 0]]

C1: 
Label: [1], Age: 0
C2: 
Label: [2], Age: 0
Distance: 2.0
In [59]:
def UPGMA(D, n):
    clusters = create_clusters(D)

    T = {}
    for c in clusters:
        T[c] = []
        
    while len(clusters) > 1:
        (c_i, c_j, dist) = two_closest(clusters, D)
        
        c_new = Cluster(c_i.elements + c_j.elements)
        c_new.left = c_i
        c_new.right = c_j
        c_new.update_age(dist / 2)
        
        T[c_new] = [(c_i, dist/2), (c_j, dist/2)]
        
        new_clusters = [c_new]
        
        for c in clusters:
            if c != c_i and c != c_j:
                new_clusters.append(c)
                
        clusters = new_clusters
        
    root = clusters[0]

    return (T, root)
In [66]:
D = [[ 0, 13, 21, 22],
     [13,  0, 12, 13],
     [21, 12,  0, 13],
     [22, 13, 13,  0]]

n = 4

graph, root = UPGMA(D, n)

#print(graph)

for v, neighbors in graph.items():
    print(v)
    print('Neighbors: ')
    for w in graph[v]:
        print('{} '.format(w))
Label: [0], Age: 0
Neighbors: 
Label: [1], Age: 0
Neighbors: 
Label: [2], Age: 0
Neighbors: 
Label: [3], Age: 0
Neighbors: 
Label: [1, 2], Age: 6.0
Neighbors: 
(<__main__.Cluster object at 0x102accb00>, 6.0) 
(<__main__.Cluster object at 0x102accb70>, 6.0) 
Label: [1, 2, 3], Age: 6.5
Neighbors: 
(<__main__.Cluster object at 0x102accc50>, 6.5) 
(<__main__.Cluster object at 0x102accbe0>, 6.5) 
Label: [1, 2, 3, 0], Age: 9.333333333333334
Neighbors: 
(<__main__.Cluster object at 0x102acccc0>, 9.333333333333334) 
(<__main__.Cluster object at 0x102acca90>, 9.333333333333334) 
In [106]:
def create_map(D):
    D_map = {}
    
    n = len(D)
    
    for i in range(n):
        D_map[i] = {}
        
        for j in range(n):
            D_map[i][j] = D[i][j]
                
    return D_map
In [71]:
D_map = create_map(D)
D_map['a'] = {}
D_map['a']['b'] = 3
print(D_map)
{0: {1: 13, 2: 21, 3: 22}, 1: {0: 13, 2: 12, 3: 13}, 2: {0: 21, 1: 12, 3: 13}, 3: {0: 22, 1: 13, 2: 13}, 'a': {'b': 3}}
In [151]:
def add_node(D_map, m, i, j):
    nodes = [x[0] for x in list(D_map.items())]
    D_map[m] = {}

    for k in nodes:
        if k != i and k != j:
            D_map[k][m] = 1/2 * (D_map[k][i] + D_map[k][j] - D_map[i][j])
            D_map[m][k] = D_map[k][m]
            
    D_map[m][m] = 0
            
    return D_map
In [131]:
def remove_node(D_map, removed_node):
    del D_map[removed_node]
    
    for node in D_map:
        if removed_node in D_map[node]:
            del D_map[node][removed_node]
        
    return D_map
In [86]:
D_map = create_map(D)
print(D_map)
remove_node(D_map, 3)
{0: {1: 13, 2: 21, 3: 22}, 1: {0: 13, 2: 12, 3: 13}, 2: {0: 21, 1: 12, 3: 13}, 3: {0: 22, 1: 13, 2: 13}}
Out[86]:
{0: {1: 13, 2: 21}, 1: {0: 13, 2: 12}, 2: {0: 21, 1: 12}}
In [87]:
def total_distance(D_map, node_i):
    total = 0
    
    for node_j in D_map[node_i]:
        total += D_map[node_i][node_j]
    
    return total
In [88]:
D_map = create_map(D)
print(D_map)
total_distance(D_map, 0)
{0: {1: 13, 2: 21, 3: 22}, 1: {0: 13, 2: 12, 3: 13}, 2: {0: 21, 1: 12, 3: 13}, 3: {0: 22, 1: 13, 2: 13}}
Out[88]:
56
In [108]:
import copy

def create_map_star(D_map):
    D_map_star = copy.deepcopy(D_map)
    n = len(D_map)
    
    for i in D_map:
        for j in D_map[i]:
            if i != j:
                D_map_star[i][j] = (n - 2) * D_map[i][j] - total_distance(D_map, i) - total_distance(D_map, j)
            else:
                D_map_star[i][j] = 0
        
    return D_map_star
In [109]:
print(D)
D_map = create_map(D)
create_map_star(D_map)
[[0, 13, 21, 22], [13, 0, 12, 13], [21, 12, 0, 13], [22, 13, 13, 0]]
Out[109]:
{0: {0: 0, 1: -68, 2: -60, 3: -60},
 1: {0: -68, 1: 0, 2: -60, 3: -60},
 2: {0: -60, 1: -60, 2: 0, 3: -68},
 3: {0: -60, 1: -60, 2: -68, 3: 0}}
In [111]:
def minimal_pair(D_star):
    min_distance = float('inf')
    min_i = None
    min_j = None
    
    for i in D_star:
        for j in D_star[i]:
            if i != j:
                current_distance = D_star[i][j]
                if current_distance < min_distance:
                    min_i = i
                    min_j = j
                    min_distance = current_distance
                    
    return (min_i, min_j)
In [156]:
def neighbor_joining(D, n):
    if n == 2:
        (i, j) = [x[0] for x in list(D.items())]
        return {
            i: [(j, D[i][j])],
            j: [(i, D[j][i])]
        }
    
    D_star = create_map_star(D)
    (i, j) = minimal_pair(D_star)
    
    delta = (total_distance(D, i) - total_distance(D, j)) / (n - 2)
    limb_length_i = 1/2 * (D[i][j] + delta)
    limb_length_j = 1/2 * (D[i][j] - delta)
    
    m = '{}+{}'.format(i, j)

    D = add_node(D, m, i, j)
    D = remove_node(D, i)
    D = remove_node(D, j)
    
    T = neighbor_joining(D, n - 1)
    
    T[m].append((i, limb_length_i))
    T[m].append((j, limb_length_j))
    T[i] = [(m, limb_length_i)]
    T[j] = [(m, limb_length_j)]
    
    return T
In [157]:
D_map = create_map(D)
n = len(D_map)
T = neighbor_joining(D_map, n)
print(T)
{'0+1': [('2+3', 4.0), (0, 11.0), (1, 2.0)], '2+3': [('0+1', 4.0), (2, 6.0), (3, 7.0)], 2: [('2+3', 6.0)], 3: [('2+3', 7.0)], 0: [('0+1', 11.0)], 1: [('0+1', 2.0)]}