Source code for ppanggolin.genome

#!/usr/bin/env python3
#coding: utf8

[docs]class Feature: def __init__(self, ID): self.ID = ID self.is_fragment = False self.type = ""
[docs] def fill_annotations(self, start, stop, strand, geneType = "", name = "", product="", local_identifier = "", position = None, genetic_code = 11): #genetic code, and position are not used in the default function. self.start = int(start) self.stop = int(stop) self.type = geneType self.strand = strand self.product = product self.name = name self.local_identifier = local_identifier
[docs] def fill_parents(self, organism, contig): self.organism = organism self.contig = contig
[docs] def add_dna(self, dna): if not isinstance(dna, str): raise TypeError(f"'str' type was expected but you provided a '{type(dna)}' type object") self.dna = dna
[docs]class RNA(Feature): pass
[docs]class Gene(Feature): def __init__(self, ID): super().__init__(ID) self.position = None self.family = None def __str__(self): return str(self.ID)
[docs] def fill_annotations(self, start, stop, strand, geneType = "", name = "", product="", local_identifier = "", position = None, genetic_code = 11): super().fill_annotations(start, stop, strand, geneType, name, product, local_identifier) self.position = position self.genetic_code = genetic_code
[docs] def add_protein(self, protein): if not isinstance(protein, str): raise TypeError(f"'str' type was expected but you provided a '{type(protein)}' type object") self.protein = protein
[docs]class Contig: def __init__(self, name, is_circular = False): self.name = name self.is_circular = is_circular self.RNAs = set()#saving the rna annotations. We're not using them in the vast majority of cases. self._genes_start = {} self._genes_position = [] @property def genes(self): return self._genes_position def __str__(self): return self.name def __iter__(self): return iter(self.genes) # retrieve gene by start position def __getitem__(self, index): gene = self._genes_start.get(index) if not gene: if not isinstance(index, int): raise TypeError(f"Expected type is int, given type was '{type(index)}'") raise IndexError(f"No gene start at the given position {index}") return gene
[docs] def addRNA(self, gene): if not isinstance(gene, RNA): raise TypeError(f"'RNA' type was expected but you provided a '{type(gene)}' type object") self.RNAs.add(gene)
[docs] def addGene(self, gene): if not isinstance(gene, Gene): raise TypeError(f"'Gene' type was expected but you provided a '{type(gene)}' type object") if gene.position is None: raise TypeError(f"The gene object needs to have its position in the contig filled before adding it") while len(self._genes_position) <= gene.position: # adding empty values. They should be filled by the end of the parsing. Doing this because genes are not always met in order. self._genes_position.append(None) self._genes_position[gene.position] = gene self._genes_start[gene.start] = gene
[docs]class Organism: def __init__(self, name): self.name = name self._contigs_getter = {} @property def families(self): """returns the gene families present in the organism""" return { gene.family for contig in self.contigs for gene in contig.genes } @property def genes(self): for contig in self.contigs: for gene in contig.genes: yield gene
[docs] def number_of_genes(self): return sum([len(list(contig.genes)) for contig in self.contigs])
@property def contigs(self): return self._contigs_getter.values() def __str__(self): return self.name
[docs] def getOrAddContig(self, key, is_circular = False): contig = self._contigs_getter.get(key) if contig is None: contig = self._createContig(key, is_circular) return contig
def _createContig(self, key, is_circular = False): new_contig = Contig(key, is_circular) self._contigs_getter[key] = new_contig return new_contig