amino_acid_masses = {
'': 0,
'G': 57,
'A': 71,
'S': 87,
'P': 97,
'V': 99,
'T': 101,
'C': 103,
'I': 113,
'L': 113,
'N': 114,
'D': 115,
'K': 128,
'Q': 128,
'E': 129,
'M': 131,
'H': 137,
'F': 147,
'R': 156,
'Y': 163,
'W': 186,
}
def linear_spectrum(peptide):
n = len(peptide)
prefix_mass = [0 for i in range(n + 1)]
for i in range(n):
aa = peptide[i]
prefix_mass[i + 1] = prefix_mass[i] + amino_acid_masses[aa]
spectrum = [0]
for i in range(n):
for j in range(i + 1, n + 1):
spectrum.append(prefix_mass[j] - prefix_mass[i])
spectrum.sort()
return spectrum
linear_spectrum('MATFMATFMATF')
def cyclic_spectrum(peptide):
n = len(peptide)
prefix_mass = [0 for i in range(n + 1)]
for i in range(n):
aa = peptide[i]
prefix_mass[i + 1] = prefix_mass[i] + amino_acid_masses[aa]
spectrum = [0]
peptide_mass = prefix_mass[-1]
for i in range(n):
for j in range(i + 1, n + 1):
segment_sum = prefix_mass[j] - prefix_mass[i]
spectrum.append(segment_sum)
if i > 0 and j < n:
spectrum.append(peptide_mass - segment_sum)
spectrum.sort()
return spectrum
cyclic_spectrum('NQEL')
def extend(peptides):
extension = []
for peptide in peptides:
for aa in amino_acid_masses:
if aa != '':
extension.append(peptide + aa)
return extension
peptides = ['SP']
extend(peptides)
def consistent(peptide, spectrum):
peptide_linear_spectrum = linear_spectrum(peptide)
n = len(peptide_linear_spectrum)
m = len(spectrum)
i = 0
j = 0
while i < n:
while spectrum[j] != peptide_linear_spectrum[i]:
j += 1
if j == m:
return False
i += 1
j += 1
return True
def mass(peptide):
total_mass = 0
for aa in peptide:
total_mass += amino_acid_masses[aa]
return total_mass
def cyclopeptide_sequencing(spectrum):
results = []
peptides = ['']
parent_mass = spectrum[-1]
while len(peptides) > 0:
extension = extend(peptides)
survived = []
for peptide in extension:
if mass(peptide) == parent_mass:
if cyclic_spectrum(peptide) == spectrum:
results.append(peptide)
elif consistent(peptide, spectrum):
survived.append(peptide)
peptides = survived
return results
# spectrum = [0, 113, 114, 128, 129, 227, 242, 242, 257, 355, 356, 370, 371, 484]
spectrum = [0, 113, 114, 128, 129, 129, 139, 242, 370, 371, 484]
cyclopeptide_sequencing(spectrum)
def score(peptide_spectrum, target_spectrum):
n = len(peptide_spectrum)
m = len(target_spectrum)
i = 0
j = 0
last_found = 0
total_score = 0
while i < n:
j = last_found
while j < m and target_spectrum[j] <= peptide_spectrum[i]:
if peptide_spectrum[i] == target_spectrum[j]:
last_found = j + 1
total_score += 1
break
j += 1
i += 1
return total_score
def linear_score(peptide, spectrum):
peptide_linear_spectrum = linear_spectrum(peptide)
return score(peptide_linear_spectrum, spectrum)
def cyclic_score(peptide, spectrum):
peptide_cyclic_spectrum = cyclic_spectrum(peptide)
return score(peptide_cyclic_spectrum, spectrum)
s1 = [0,1,2,2,4]
s2 = [0, 1, 2, 3, 4, 5]
score(s1, s2)
peptide = 'MAVT'
print(linear_score(peptide, spectrum))
def trim(leaderboard, spectrum, N):
if len(leaderboard) <= N:
return leaderboard
sorted_leaderboard = []
for peptide in leaderboard:
peptide_linear_score = linear_score(peptide, spectrum)
sorted_leaderboard.append((peptide_linear_score, peptide))
sorted_leaderboard.sort(reverse=True)
for i in range(N,len(leaderboard)):
if sorted_leaderboard[i][0] < sorted_leaderboard[N - 1][0]:
trimmed = sorted_leaderboard[:i]
return [x[1] for x in trimmed]
trimmed = sorted_leaderboard
return [x[1] for x in trimmed]
def leaderboard_cyclopeptide_sequencing(spectrum, N):
leaderboard = ['']
leader_peptide = ''
leader_peptide_score = 0
parent_mass = spectrum[-1]
while len(leaderboard) > 0:
extension = extend(leaderboard)
survived = []
for peptide in extension:
peptide_mass = mass(peptide)
if peptide_mass == parent_mass:
peptide_score = cyclic_score(peptide, spectrum)
if peptide_score > leader_peptide_score:
leader_peptide_score = peptide_score
leader_peptide = peptide
survived.append(peptide)
elif peptide_mass <= parent_mass:
survived.append(peptide)
leaderboard = trim(survived, spectrum, N)
return leader_peptide
spectrum = [0,
71,
71,
71,
101,
101,
101,
131,
131,
131,
147,
147,
147,
172,
172,
172,
202,
202,
202,
248,
248,
248,
278,
278,
303,
303,
303,
319,
319,
319,
349,
349,
379,
379,
450,
450,
450,
450,
450,
450,
450,
450,
450,
521,
521,
551,
551,
581,
581,
597,
597,
622,
622,
652,
652,
698,
698,
728,
753,
753,
769,
769,
799,
829,
900,
900,
900,
900,
900,
971,
1001,
1031,
1047,
1072,
1102,
1148,
1203,
1219,
1350]
N = 10
leaderboard_cyclopeptide_sequencing(spectrum, N)