Primer učitavanja sekvence iz fajla i translacije

In [1]:
from Bio.Seq import Seq
import Bio.SeqIO as SeqIO
from Bio.Alphabet import generic_dna, generic_protein
In [2]:
# Ucitavanje sekvence HBB proteina pacijenta iz fajla.
# Data je nukleotidna sekvenca koja kodira HBB protein
sick_patient_sequence = SeqIO.read('HBS.fasta', 'fasta')
In [3]:
# Ceo rekord
print(sick_patient_sequence)

# Sekvenca
print(sick_patient_sequence.seq)
ID: Sick
Name: Sick
Description: Sick patient HBB
Number of features: 0
Seq('ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGG...CAA', SingleLetterAlphabet())
ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGTGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGTGAGGCCCTGGGCAGGCTGCTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATCTGTCCACTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGCTCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACCTTTGCCACACTGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGCTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCACCCCACCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAGTATCACTAAGCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAGTCCAACTACTAAACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATAAAAAACATTTATTTTCATTGCAA
In [4]:
# Proteinska sekvenca zdravog HBB proteina
hbb = Seq('MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH', generic_dna)
print(hbb)
MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
In [5]:
# translated_sick = sick_patient_sequence.seq[0:].translate() # okvir 1
# translated_sick = sick_patient_sequence.seq[1:].translate() # okvir 2

# Translirana DNK (posredstvom mRNK) u proteinsku sekvencu od treceg okvira
translated_sick = sick_patient_sequence.seq[2:].translate()
print(translated_sick)
ICF*HNCVH*QPQTDTMVHLTPVEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH*ARFLAVQFLLKVPLFPKSNY*TGGYYEGP*ASGFCLIKNIYFHC
/usr/local/lib/python3.7/site-packages/Bio/Seq.py:2859: BiopythonWarning: Partial codon, len(sequence) not a multiple of three. Explicitly trim the sequence or add trailing N before translation. This may become an error in future.
  BiopythonWarning,
In [6]:
# Prva pojava Metionina (startne aminokiseline)
translated_sick.find('M')
Out[6]:
16
In [7]:
start_codon = translated_sick.find('M')
sliced_sequence = translated_sick[start_codon:]
# Pronalazenje prvog stop kodona od M
stop_codon = sliced_sequence.find('*')
sliced_sequence = sliced_sequence[:stop_codon]
print(sliced_sequence)
MVHLTPVEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
In [8]:
print(hbb)
print()
print(sliced_sequence)
MVHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH

MVHLTPVEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH
In [9]:
# Pronalazenje razlika izmedju proteinskih sekvenci zdravog i bolesnog HBB
# U pitanju je srpasta anemija kod koje se, usled mutacije jednog nukleotida
# na 6. poziciji (7. ako se broji od 0) aminokiselina Glutamat (E) menja Valinom (V)
for i in range(len(hbb)):
    if hbb[i] != sliced_sequence[i]:
        print(f'Pos: {i}, {hbb[i]}->{sliced_sequence[i]}')
Pos: 6, E->V