a
    f	g!                  	   @   s4  d dl Zd dlmZmZ d dlmZ d dlm	Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZ d dlZd dlmZ d d	lmZ eed
ddeddd6eee  dddZ!eed
ddeddd7eee  dddZ"eed
ddeddd8ee eee   dddZ#eed
ddeddd9ee eee   dddZ$dd Z%dd Z&d:ee  eee   ee d d!d"Z'ej(d#d$d%Z)d;eee   ej(d'd(d)Z*d<eee   ej(d'd*d+Z+d=ej(eee  eee  d/d0d1Z,d>ej(eee  eee  eeee-d3d4d5Z.dS )?    N)ListOptional)spatial)PCA)TSNE)average_precision_scoreprecision_recall_curve)retrystop_after_attemptwait_random_exponential)numpy)pandas      )minmax   )waitstoptext-similarity-davinci-001)textreturnc                 K   s2   |  dd} tjjf | g|d|d d d S N
 inputenginedatar   	embedding)replaceopenai	Embeddingcreater   r   kwargs r&   Z/var/www/html/llm_bihealth/app/venv/lib/python3.9/site-packages/openai/embeddings_utils.pyget_embedding   s    r(   c                    s8   |  dd} tjjf | g|d|I d H d d d S r   )r    r!   r"   acreater$   r&   r&   r'   aget_embedding   s    &r*   text-similarity-babbage-001)list_of_textr   c                 K   sJ   t | dksJ ddd | D } tjjf | |d|j}dd |D S )N   .The batch size should not be larger than 2048.c                 S   s   g | ]}| d dqS r   r   r    .0r   r&   r&   r'   
<listcomp>.       z"get_embeddings.<locals>.<listcomp>r   c                 S   s   g | ]}|d  qS r   r&   r2   dr&   r&   r'   r3   1   r4   )lenr!   r"   r#   r   r,   r   r%   r   r&   r&   r'   get_embeddings'   s    r:   c                    sP   t | dksJ ddd | D } tjjf | |d|I d H j}dd |D S )Nr-   r.   c                 S   s   g | ]}| d dqS r/   r0   r1   r&   r&   r'   r3   ;   r4   z#aget_embeddings.<locals>.<listcomp>r   c                 S   s   g | ]}|d  qS r5   r&   r6   r&   r&   r'   r3   >   r4   )r8   r!   r"   r)   r   r9   r&   r&   r'   aget_embeddings4   s     r;   c                 C   s$   t | |t j| t j|  S )N)npdotZlinalgZnorm)abr&   r&   r'   cosine_similarityA   s    r@   c                    sf  t  }tj fddt|D ddj}t }t }t }t|D ]Z}	t|dd|	f | dd|	f \||	< ||	< }
t|dd|	f | dd|	f ||	< qFt| |  \}}}
t|| dd}t	t
|d|  tjd	d
 tjdddd}g }g }|D ]j}tdd}|| d| |  }tj||dk ||dk ddd\}tjd|d|d d fd q|| |d tj||ddd\}|| |d| t|D ]D}	tj||	 ||	 dd\}|| |d |	 ||	  qt }|jdd  td!d"g td!d#g td$ td% t| d& t|| dS )'a!  
    Precision-Recall plotting for a multiclass problem. It plots average precision-recall, per class precision recall and reference f1 contours.

    Code slightly modified, but heavily based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html
    c                    s   g | ]} | kqS r&   r&   )r2   i
class_listy_true_untransformedr&   r'   r3   O   r4   z4plot_multiclass_precision_recall.<locals>.<listcomp>r   )ZaxisNmicro)Zaveragez5 - Average precision score over all classes: {0:0.2f})	   
   )Zfigsizeg?g?   )numg{Gz?   r   Zgray)coloralphazf1={0:0.1f}g?-   g{Gz?)Zxyziso-f1 curvesZgold)rK   lwz+average Precision-recall (auprc = {0:0.2f}))rN   z3Precision-recall for class `{0}` (auprc = {1:0.2f})g      ?)Zbottomg        g      ?g?ZRecallZ	Precisionz': Precision-Recall curve for each class)r8   pdconcatrangevaluesdictr   r   Zravelprintstrformatpltfigurer<   ZlinspaceZplotZannotateappendZgcfZsubplots_adjustZxlimZylimZxlabelZylabeltitleZlegend)Zy_scorerD   rC   Zclassifier_nameZ	n_classesZy_trueZ	precisionZrecallZaverage_precisionrA   _Zprecision_microZrecall_microZaverage_precision_microZf_scoreslineslabelsZf_scorexylZfigr&   rB   r'    plot_multiclass_precision_recallE   sj    0(
$$





ra   cosine)query_embedding
embeddingsr   c                    s8   t jjt jjt jjt jjd fdd|D }|S )zHReturn the distances between a query embedding and a list of embeddings.)rb   ZL1ZL2ZLinfc                    s   g | ]}  |qS r&   r&   )r2   r   distance_metricZdistance_metricsrc   r&   r'   r3      s   z-distances_from_embeddings.<locals>.<listcomp>)r   Zdistancerb   Z	cityblockZ	euclideanZ	chebyshev)rc   rd   rf   	distancesr&   re   r'   distances_from_embeddings   s    rh   )r   c                 C   s
   t | S )zGReturn a list of indices of nearest neighbors from a list of distances.)r<   Zargsort)rg   r&   r&   r'   +indices_of_nearest_neighbors_from_distances   s    ri   rJ   )rd   r   c                 C   s   t |d}t| }||S )z2Return the PCA components of a list of embeddings.)n_components)r   r<   arrayfit_transform)rd   rj   pcaarray_of_embeddingsr&   r&   r'   pca_components_from_embeddings   s    

ro   c                 K   sN   d|  vrd|d< d|  vr(d|d< tf d|i|}t| }||S )z1Returns t-SNE components of a list of embeddings.initrm   Zlearning_rateautorj   )keysr   r<   rk   rl   )rd   rj   r%   Ztsnern   r&   r&   r'   tsne_components_from_embeddings   s    
rs   Component 0Component 1   )
componentsr]   stringsc           
      K   s   dd | D }t || dddf || dddf d|r<|n|d|rRdd |D n|i}tj|f|||rndnd|rxdnd|rdgndd	|jt|d
d}	|	S )z7Return an interactive 2D chart of embedding components.c                 S   s   g | ]}d qS  r&   r2   r[   r&   r&   r'   r3      r4   z)chart_from_components.<locals>.<listcomp>Nr   r   labelstringc                 S   s    g | ]}d  tj|ddqS z<br>   )widthjointrwrapr2   r}   r&   r&   r'   r3      r4   )r^   r_   rK   symbol
hover_datasizemarker)rO   	DataFramepxZscatterupdate_tracesrS   )
rw   r]   rx   x_titley_title	mark_sizer%   
empty_listr   chartr&   r&   r'   chart_from_components   s2    



	r   Compontent 2)rw   r]   rx   r   r   z_titler   c                 K   s   dd | D }t || dddf || dddf || dddf d|rL|n|d|rbd	d |D n|i}	tj|	f||||rdnd|rdnd|rdgndd
|jt|dd}
|
S )z7Return an interactive 3D chart of embedding components.c                 S   s   g | ]}d qS ry   r&   r{   r&   r&   r'   r3      r4   z,chart_from_components_3D.<locals>.<listcomp>Nr   r   rJ   r|   r}   c                 S   s    g | ]}d  tj|ddqS r~   r   r   r&   r&   r'   r3      r4   )r^   r_   zrK   r   r   r   r   )rO   r   r   Z
scatter_3dr   rS   )rw   r]   rx   r   r   r   r   r%   r   r   r   r&   r&   r'   chart_from_components_3D   s6    

	
r   )r   )r   )r+   )r+   )rb   )rJ   )rJ   )NNrt   ru   rv   )NNrt   ru   r   rv   )/textwrapr   typingr   r   Zmatplotlib.pyplotZpyplotrW   Zplotly.expressZexpressr   Zscipyr   Zsklearn.decompositionr   Zsklearn.manifoldr   Zsklearn.metricsr   r   Ztenacityr	   r
   r   r!   Zopenai.datalib.numpy_helperr   r<   Zopenai.datalib.pandas_helperr   rO   rU   floatr(   r*   r:   r;   r@   ra   rh   Zndarrayri   ro   rs   r   intr   r&   r&   r&   r'   <module>   s     
 
I 
 

 
     

#      

