Source code for stereoAlign.metrics.ari

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 8/11/23 2:53 PM
# @Author  : zhangchao
# @File    : ari.py
# @Email   : zhangchao5@genomics.cn
import numpy as np
import pandas as pd
import scipy.special
from scanpy._utils import deprecated_arg_names
from sklearn.metrics.cluster import adjusted_rand_score

from stereoAlign.utils import check_adata, check_batch


[docs]@deprecated_arg_names({"group1": "cluster_key", "group2": "label_key"}) def ari(adata, cluster_key, label_key, implementation=None): """Adjusted Rand Index The adjusted rand index is a chance-adjusted rand index, which evaluates the pair-wise accuracy of clustering vs. ground truth label assignments. The score ranges between 0 and 1 with larger values indicating better conservation of the data-driven cell identity discovery after integration compared to annotated labels. Parameters ---------- adata: anndata object with cluster assignments in ``adata.obs[cluster_key]`` cluster_key: string of column in adata.obs containing cluster assignments label_key: string of column in adata.obs containing labels implementation: if set to 'sklearn', uses sklearn's implementation, otherwise native implementation is taken This function can be applied to all integration output types. The ``adata`` must contain cluster assignments that are based off the knn graph given or derived from the integration method output. """ check_adata(adata) check_batch(cluster_key, adata.obs) check_batch(label_key, adata.obs) cluster_key = adata.obs[cluster_key].to_numpy() label_key = adata.obs[label_key].to_numpy() if len(cluster_key) != len(label_key): raise ValueError( f"different lengths in cluster_key ({len(cluster_key)}) and label_key ({len(label_key)})" ) if implementation == "sklearn": return adjusted_rand_score(cluster_key, label_key) def binom_sum(x, k=2): return scipy.special.binom(x, k).sum() n = len(cluster_key) contingency = pd.crosstab(cluster_key, label_key) ai_sum = binom_sum(contingency.sum(axis=0)) bi_sum = binom_sum(contingency.sum(axis=1)) index = binom_sum(np.ravel(contingency)) expected_index = ai_sum * bi_sum / binom_sum(n, 2) max_index = 0.5 * (ai_sum + bi_sum) return (index - expected_index) / (max_index - expected_index)