#default libraries
import logging
from collections import defaultdict
#installed libraries
import plotly.graph_objs as go
import plotly.offline as out_plotly
#local libraries
from ppanggolin.formats import checkPangenomeInfo
[docs]def drawUCurve(pangenome, output, soft_core = 0.95):
    checkPangenomeInfo(pangenome, needAnnotations=True, needFamilies=True, needGraph=True)
    logging.getLogger().info("Drawing the U-shaped curve...")
    max_bar = 0
    count = defaultdict(lambda : defaultdict(int))
    is_partitionned = False
    has_undefined = False
    for fam in pangenome.geneFamilies:
        nb_org  = len(fam.organisms)
        if fam.partition != "":
            is_partitionned = True
            if fam.partition == "U":
                has_undefined = True
            count[nb_org][fam.namedPartition]+=1
        count[nb_org]["pangenome"]+=1
        max_bar = count[nb_org]["pangenome"] if count[nb_org]["pangenome"] > max_bar else max_bar
    data_plot = []
    chao = "NA"
    if count[1]["pangenome"] > 0:
        chao = round(len(pangenome.geneFamilies) + ((count[0]["pangenome"]^2)/(count[1]["pangenome"]*2)),2)
    COLORS = {"pangenome":"black", "exact_accessory":"#EB37ED", "exact_core" :"#FF2828", "soft_core":"#c7c938", "soft_accessory":"#996633","shell": "#00D860", "persistent":"#F7A507", "cloud":"#79DEFF", "undefined":"#828282"}
    if is_partitionned and not has_undefined:
        persistent_values = []
        shell_values      = []
        cloud_values      = []
        for nb_org in range(1,len(pangenome.organisms)+1):
            persistent_values.append(count[nb_org]["persistent"])
            shell_values.append(count[nb_org]["shell"])
            cloud_values.append(count[nb_org]["cloud"])
        data_plot.append(go.Bar(x=list(range(1,len(pangenome.organisms)+1)),y=persistent_values,name='persistent', marker=dict(color = COLORS["persistent"])))
        data_plot.append(go.Bar(x=list(range(1,len(pangenome.organisms)+1)),y=shell_values,name='shell', marker=dict(color = COLORS["shell"])))
        data_plot.append(go.Bar(x=list(range(1,len(pangenome.organisms)+1)),y=cloud_values,name='cloud', marker=dict(color = COLORS["cloud"])))
    else:
        text = 'undefined' if has_undefined else "pangenome"
        undefined_values = []
        for nb_org in range(1,len(pangenome.organisms)+1):
            undefined_values.append(count[nb_org][text])
        data_plot.append(go.Bar(x=list(range(1,len(pangenome.organisms)+1)),y=undefined_values,name=text, marker=dict(color = COLORS[text])))
    layout = None
    x = len(pangenome.organisms)*soft_core
    layout =  go.Layout(title = "Gene families frequency distribution (U shape), chao="+str(chao),
                        xaxis = dict(title='Occurring in x genomes'),
                        yaxis = dict(title='# of gene families (F)'),
                        barmode='stack',
                        shapes=[dict(type='line', x0=x, x1=x, y0=0, y1=max_bar, line = dict(dict(width=5, dash='dashdot', color="grey")))],
                        plot_bgcolor='#ffffff')
    fig = go.Figure(data=data_plot, layout=layout)
    out_plotly.plot(fig, filename = output+"/Ushaped_plot.html", auto_open=False)
    logging.getLogger().info(f"Done drawing the U-shaped curve : '{output+'/Ushaped_plot.html'}'")