Source code for ppanggolin.region

#!/usr/bin/env python3
#coding: utf8

#default libraries
import logging
from collections.abc import Iterable

#local libraries
from ppanggolin.genome import Organism, Gene

[docs]class Region: def __init__(self, ID): self.genes = [] self.name = ID self.score = 0 def __hash__(self): return id(self) def __eq__(self, other): """ expects another Region type object. Will test whether two Region objects have the same gene families""" if not isinstance(other, Region): raise TypeError(f"'Region' type object was expected, but '{type(other)}' type object was provided.") if [ gene.family for gene in self.genes ] == [ gene.family for gene in other.genes ]: return True if [ gene.family for gene in self.genes ] == [ gene.family for gene in other.genes[::-1]]: return True return False
[docs] def append(self, value): # allowing only gene-class objects in a region. if isinstance(value, Gene): self.genes.append(value) else: raise TypeError("Unexpected class / type for " + type(value) +" when adding it to a region of genomic plasticity")
@property def families(self): return { gene.family for gene in self.genes } @property def start(self): return min(self.genes, key = lambda x : x.start).start @property def startGene(self): return min(self.genes, key = lambda x : x.position) @property def stopGene(self): return max(self.genes, key = lambda x : x.position) @property def stop(self): return max(self.genes, key = lambda x : x.stop).stop @property def organism(self): return self.genes[0].organism @property def contig(self): return self.genes[0].contig @property def isWholeContig(self): """ Indicates if the region is an entire contig """ if self.startGene.position == 0 and self.stopGene.position == len(self.contig.genes)-1: return True return False @property def isContigBorder(self): if len(self.genes) == 0: raise Exception("Your region has no genes. Something wrong happenned.") if self.startGene.position == 0 and not self.contig.is_circular: return True elif self.stopGene.position == len(self.contig.genes)-1 and not self.contig.is_circular: return True return False
[docs] def getRNAs(self): RNAs = set() for rna in self.contig.RNAs: if rna.start > self.start and rna.start < self.stop: RNAs.add(rna) return RNAs
def __len__(self): return len(self.genes) def __getitem__(self, index): return self.genes[index]
[docs] def getBorderingGenes(self, n, multigenics): border = [[], []] pos = self.startGene.position init = pos while len(border[0]) < n and (pos != 0 and not self.contig.is_circular): curr_gene = None if pos == 0: if self.contig.is_circular: curr_gene = self.contig.genes[-1] else: curr_gene = self.contig.genes[pos -1] if curr_gene is not None and curr_gene.family not in multigenics and curr_gene.family.namedPartition == "persistent": border[0].append(curr_gene) pos -= 1 if pos == -1 and self.contig.is_circular: pos = len(self.contig.genes) if pos == init: logging.getLogger().warning("looped around the contig") break#looped around the contig pos = self.stopGene.position init = pos while len(border[1]) < n and (pos != len(self.contig.genes)-1 and not self.contig.is_circular): curr_gene = None if pos == len(self.contig.genes)-1: if self.contig.is_circular: curr_gene = self.contig.genes[0] else: curr_gene = self.contig.genes[pos+1] if curr_gene is not None and curr_gene.family not in multigenics: border[1].append(curr_gene) pos+=1 if pos == len(self.contig.genes) and self.contig.is_circular: pos = -1 if pos == init: logging.getLogger().warning("looped around the contig") break#looped around the contig return border
[docs]class Spot: def __init__(self, ID): self.ID = ID self.regions = set() self._uniqOrderedSet = {} self._compOrderedSet = False self._uniqContent = {} self._compContent = False
[docs] def addRegions(self, regions): """ Adds region(s) contained in an Iterable to the spot which all have the same bordering persistent genes provided with 'borders'""" if isinstance(regions, Iterable): for region in regions: self.addRegion(region) else: raise Exception("The provided 'regions' variable was not an Iterable")
[docs] def addRegion(self, region): if isinstance(region, Region): self.regions.add(region)
[docs] def borders(self, set_size, multigenics): """ extracts all the borders of all RGPs belonging to the spot""" all_borders = [] for rgp in self.regions: all_borders.append(rgp.getBorderingGenes(set_size, multigenics)) family_borders=[] c=0 for borders in all_borders: c+=1 new=True curr_set = [ [ gene.family for gene in borders[0]], [gene.family for gene in borders[1]]] for i, (c, former_borders) in enumerate(family_borders): if former_borders == curr_set or former_borders == curr_set[::-1]: family_borders[i][0]+=1 new=False break if new: family_borders.append([1, curr_set]) return family_borders
def _mkUniqOrderedSetObj(self): """cluster RGP into groups that have an identical synteny""" for rgp in self.regions: z = True for seenRgp in self._uniqOrderedSet: if rgp == seenRgp: z = False self._uniqOrderedSet[seenRgp].add(rgp) if z: self._uniqOrderedSet[rgp] = set([rgp]) def _mkUniqContent(self): """cluster RGP into groups that have identical gene content""" for rgp in self.regions: z = True for seenRgp in self._uniqContent: if rgp.families == seenRgp.families: z = False self._uniqContent[seenRgp].add(rgp) if z: self._uniqContent[rgp] = set([rgp]) def _getContent(self): """Creates the _uniqContent object if it was never computed. Return it in any case""" if not self._compContent: self._mkUniqContent() self._compContent = True return self._uniqContent def _getOrderedSet(self): """Creates the _uniqSyn object if it was never computed. Return it in any case""" if not self._compOrderedSet: self._mkUniqOrderedSetObj() self._compOrderedSet = True return self._uniqOrderedSet
[docs] def getUniqOrderedSet(self): """ returns an Iterable of all the unique syntenies in the spot""" return set(self._getOrderedSet().keys())
[docs] def getUniqContent(self): """ returns an Iterable of all the unique rgp (in terms of gene family content) in the spot""" return set(self._getContent().keys())
[docs] def countUniqContent(self): """Returns a counter with a representative rgp as key and the number of identical rgp in terms of gene family content as value""" return dict([ (key, len(val)) for key, val in self._getContent().items()])
[docs] def countUniqOrderedSet(self): """ Returns a counter with a representative rgp as key and the number of identical rgp in terms of synteny as value""" return dict([ (key, len(val)) for key, val in self._getOrderedSet().items()])