HomeArtificial IntelligenceThe way to Create a Bioinformatics AI Agent Utilizing Biopython for DNA...

The way to Create a Bioinformatics AI Agent Utilizing Biopython for DNA and Protein Evaluation


class BioPythonAIAgent:
   def __init__(self, e mail="[email protected]"):
       self.e mail = e mail
       Entrez.e mail = e mail
       self.sequences = {}
       self.analysis_results = {}
       self.alignments = {}
       self.timber = {}
  
   def fetch_sequence_from_ncbi(self, accession_id, db="nucleotide", rettype="fasta"):
       attempt:
           deal with = Entrez.efetch(db=db, id=accession_id, rettype=rettype, retmode="textual content")
           document = SeqIO.learn(deal with, "fasta")
           deal with.shut()
           self.sequences[accession_id] = document
           return document
       besides Exception as e:
           print(f"Error fetching sequence: {str(e)}")
           return None
  
   def create_sample_sequences(self):
       covid_spike = "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT"
      
       human_insulin = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN"
      
       e_coli_16s = "AAATTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGGAAGCAGCTTGCTGCTTTGCTGACGAGTGGCGGACGGGTGAGTAATGTCTGGGAAACTGCCTGATGGAGGGGGATAACTACTGGAAACGGTAGCTAATACCGCATAATGTCGCAAGACCAAAGAGGGGGACCTTCGGGCCTCTTGCCATCGGATGTGCCCAGATGGGATTAGCTAGTAGGTGGGGTAACGGCTCACCTAGGCGACGATCCCTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGCGTTAAGGTTAATAACCTTGGCGATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA"
      
       sample_sequences = [
           ("COVID_Spike", covid_spike, "SARS-CoV-2 Spike Protein"),
           ("Human_Insulin", human_insulin, "Human Insulin Precursor"),
           ("E_coli_16S", e_coli_16s, "E. coli 16S rRNA")
       ]
      
       for seq_id, seq_str, desc in sample_sequences:
           document = SeqRecord(Seq(seq_str), id=seq_id, description=desc)
           self.sequences[seq_id] = document
      
       return sample_sequences
  
   def analyze_sequence(self, sequence_id=None, sequence=None):
       if sequence_id and sequence_id in self.sequences:
           seq_record = self.sequences[sequence_id]
           seq = seq_record.seq
           description = seq_record.description
       elif sequence:
           seq = Seq(sequence)
           description = "Customized sequence"
       else:
           return None
      
       evaluation = {
           'size': len(seq),
           'composition': {}
       }
      
       for base in ['A', 'T', 'G', 'C']:
           evaluation['composition'][base] = seq.rely(base)
      
       if 'A' in evaluation['composition'] and 'T' in evaluation['composition']:
           evaluation['gc_content'] = spherical(gc_fraction(seq) * 100, 2)
           attempt:
               evaluation['molecular_weight'] = spherical(molecular_weight(seq, seq_type="DNA"), 2)
           besides:
               evaluation['molecular_weight'] = len(seq) * 650
      
       attempt:
           if len(seq) % 3 == 0:
               protein = seq.translate()
               evaluation['translation'] = str(protein)
               evaluation['stop_codons'] = protein.rely('*')
              
               if '*' not in str(protein)[:-1]:
                   prot_analysis = ProteinAnalysis(str(protein)[:-1])
                   evaluation['protein_mw'] = spherical(prot_analysis.molecular_weight(), 2)
                   evaluation['isoelectric_point'] = spherical(prot_analysis.isoelectric_point(), 2)
                   evaluation['protein_composition'] = prot_analysis.get_amino_acids_percent()
       besides:
           cross
      
       key = sequence_id if sequence_id else "customized"
       self.analysis_results[key] = evaluation
      
       return evaluation
  
   def visualize_composition(self, sequence_id):
       if sequence_id not in self.analysis_results:
           return
      
       evaluation = self.analysis_results[sequence_id]
      
       fig = make_subplots(
           rows=2, cols=2,
           specs=[[{"type": "pie"}, {"type": "bar"}],
                  [{"colspan": 2}, None]],
           subplot_titles=("Nucleotide Composition", "Base Rely", "Sequence Properties")
       )
      
       labels = record(evaluation['composition'].keys())
       values = record(evaluation['composition'].values())
      
       fig.add_trace(
           go.Pie(labels=labels, values=values, title="Composition"),
           row=1, col=1
       )
      
       fig.add_trace(
           go.Bar(x=labels, y=values, title="Rely", marker_color=['red', 'blue', 'green', 'orange']),
           row=1, col=2
       )
      
       properties = ['Length', 'GC%', 'MW (kDa)']
       prop_values = [
           analysis['length'],
           evaluation.get('gc_content', 0),
           evaluation.get('molecular_weight', 0) / 1000
       ]
      
       fig.add_trace(
           go.Scatter(x=properties, y=prop_values, mode="markers+strains",
                     marker=dict(dimension=10, colour="purple"), title="Properties"),
           row=2, col=1
       )
      
       fig.update_layout(
           title=f"Complete Evaluation: {sequence_id}",
           showlegend=False,
           peak=600
       )
      
       fig.present()
  
   def perform_multiple_sequence_alignment(self, sequence_ids):
       if len(sequence_ids)  1:
           fig = make_subplots(
               rows=2, cols=2,
               subplot_titles=("Size Comparability", "GC Content material", "Molecular Weight", "Composition Heatmap")
           )
          
           fig.add_trace(
               go.Bar(x=df['sequence_id'], y=df['length'], title="Size"),
               row=1, col=1
           )
          
           if 'gc_content' in df.columns:
               fig.add_trace(
                   go.Scatter(x=df['sequence_id'], y=df['gc_content'], mode="markers+strains", title="GC%"),
                   row=1, col=2
               )
          
           if 'molecular_weight' in df.columns:
               fig.add_trace(
                   go.Bar(x=df['sequence_id'], y=df['molecular_weight'], title="MW"),
                   row=2, col=1
               )
          
           fig.update_layout(title="Comparative Sequence Evaluation", peak=600)
           fig.present()
      
       return df
  
   def codon_usage_analysis(self, sequence_id):
       if sequence_id not in self.sequences:
           return None
      
       seq = self.sequences[sequence_id].seq
      
       if len(seq) % 3 != 0:
           return None
      
       codons = {}
       for i in vary(0, len(seq) - 2, 3):
           codon = str(seq[i:i+3])
           codons[codon] = codons.get(codon, 0) + 1
      
       codon_df = pd.DataFrame(record(codons.objects()), columns=['Codon', 'Count'])
       codon_df = codon_df.sort_values('Rely', ascending=False)
      
       fig = px.bar(codon_df.head(20), x='Codon', y='Rely',
                    title=f"High 20 Codon Utilization - {sequence_id}")
       fig.present()
      
       return codon_df
  
   def motif_search(self, sequence_id, motif_pattern):
       if sequence_id not in self.sequences:
           return []
      
       seq = str(self.sequences[sequence_id].seq)
       positions = []
      
       for i in vary(len(seq) - len(motif_pattern) + 1):
           if seq[i:i+len(motif_pattern)] == motif_pattern:
               positions.append(i)
      
       return positions
  
   def gc_content_window(self, sequence_id, window_size=100):
       if sequence_id not in self.sequences:
           return None
      
       seq = self.sequences[sequence_id].seq
       gc_values = []
       positions = []
      
       for i in vary(0, len(seq) - window_size + 1, window_size//4):
           window = seq[i:i+window_size]
           gc_values.append(gc_fraction(window) * 100)
           positions.append(i + window_size//2)
      
       fig = go.Determine()
       fig.add_trace(go.Scatter(x=positions, y=gc_values, mode="strains+markers",
                               title=f'GC Content material (window={window_size})'))
       fig.update_layout(
           title=f"GC Content material Sliding Window Evaluation - {sequence_id}",
           xaxis_title="Place",
           yaxis_title="GC Content material (%)"
       )
       fig.present()
      
       return positions, gc_values
  
   def run_comprehensive_analysis(self, sequence_ids):
       outcomes = {}
      
       for seq_id in sequence_ids:
           if seq_id in self.sequences:
               evaluation = self.analyze_sequence(seq_id)
               self.visualize_composition(seq_id)
              
               gc_analysis = self.gc_content_window(seq_id)
               codon_analysis = self.codon_usage_analysis(seq_id)
              
               outcomes[seq_id] = {
                   'basic_analysis': evaluation,
                   'gc_window': gc_analysis,
                   'codon_usage': codon_analysis
               }
      
       if len(sequence_ids) > 1:
           comparative_df = self.comparative_analysis(sequence_ids)
           outcomes['comparative'] = comparative_df
      
       return outcomes

RELATED ARTICLES

LEAVE A REPLY

Please enter your comment!
Please enter your name here

- Advertisment -
Google search engine

Most Popular

Recent Comments