Source code for ppanggolin.geneFamily

#!/usr/bin/env python3
#coding: utf8

#default libraries
from collections import defaultdict

#installed libraries
import gmpy2

#local libraries
from ppanggolin.genome import Gene


[docs]class GeneFamily: """This represents a single gene family. It will be a node in the pangenome graph, and be aware of its genes and edges. """ def __init__(self, ID, name): """Constructor method :param ID: The internal identifier to give to the gene family :type ID: any :param name: The name of the gene family (to be printed in output files) :type name: str """ self.name = str(name) self.ID = ID self._edges = {} self._genePerOrg = defaultdict(set) self.genes = set() self.removed = False#for the repeated family not added in the main graph self.sequence = "" self.partition = ""
[docs] def addSequence(self, seq): """Assigns a protein sequence to the gene family. :param seq: the sequence to add to the gene family :type seq: str """ self.sequence = seq
[docs] def addPartition(self, partition): """Assigns a partition to the gene family. It should be the raw partition name provided by NEM. :param partition: The partition :type partition: str """ self.partition = partition
@property def namedPartition(self): """Reads the :attr:partition attribute and returns a meaningful name :raises Exception: If the gene family has no partition assigned :return: the partition name of the gene family :rtype: str """ if self.partition == "": raise Exception("The gene family has not beed associated to a partition") if self.partition.startswith("P"): return "persistent" elif self.partition.startswith("C"): return "cloud" elif self.partition.startswith("S"): return "shell" else: return "undefined"
[docs] def addGene(self, gene): """Add a gene to the gene family, and sets the gene's :attr:family accordingly. :param gene: the gene to add :type gene: :class:`ppanggolin.genome.Gene` :raises TypeError: If the provided `gene` is of the wrong type """ if not isinstance(gene, Gene): raise TypeError(f"'Gene' type object was expected, but '{type(gene)}' type object was provided.") self.genes.add(gene) gene.family = self if hasattr(gene, "organism"): self._genePerOrg[gene.organism].add(gene)
[docs] def mkBitarray(self, index): """Produces a bitarray representing the presence / absence of the family in the pangenome using the provided index The bitarray is stored in the :attr:`bitarray` attribute and is a :class:`gmpy2.xmpz` type. :param index: The index computed by :func:`ppanggolin.pangenome.Pangenome.getIndex` :type index: dict[:class:`ppanggolin.genome.Organism`, int] """ self.bitarray = gmpy2.xmpz(0)#pylint: disable=no-member for org in self.organisms: self.bitarray[index[org]] = 1
[docs] def getOrgDict(self): """Returns the organisms and the genes belonging to the gene family :return: a dictionnary of organism as key and set of genes as values :rtype: dict[ :class:`ppanggolin.genome.Organism` ,set[:class:`ppanggolin.genome.Gene`] """ try: return self._genePerOrg except AttributeError: for gene in self.genes: self._genePerOrg[gene.organism].add(gene) return self._genePerOrg
[docs] def getGenesPerOrg(self, org): """Returns the genes belonging to the gene family in the given Organism :param org: Organism to look for :type org: :class:`ppanggolin.genome.Organism` :return: a set of gene(s) :rtype: set[:class:`ppanggolin.genome.Gene`] """ try: return self._genePerOrg[org] except AttributeError: for gene in self.genes: self._genePerOrg[gene.organism].add(gene) return self._genePerOrg[org]
@property def neighbors(self): """Returns all of the :class:`ppanggolin.geneFamily.GeneFamily` that are linked with an edge :return: Neighbors :rtype: set[:class:`ppanggolin.geneFamily.GeneFamily`] """ return set(self._edges.keys()) @property def edges(self): """Returns all of the :class:`ppanggolin.pangenome.Edge` that are linked to this gene family :return: Edges of the gene family :rtype: list[:class:`ppanggolin.pangenome.Edge`] """ return list(self._edges.values()) @property def organisms(self): """Returns all of the :class:`ppanggolin.genome.Organism` that have this gene family :return: Organisms that have this gene family :rtype: set[:class:`ppanggolin.genome.Organism`] """ try: return set(self._genePerOrg.keys()) except AttributeError:#then the genes have been added before they had organisms for gene in self.genes: self._genePerOrg[gene.organism].add(gene) return set(self._genePerOrg.keys())