深度学习
pythonfrom __future__ import division
import numpy as np
from scipy import sparse as sp
def check_clusterings(labels_true, labels_pred):
"""Check that the two clusterings matching 1D integer arrays."""
labels_true = np.asarray(labels_true)
labels_pred = np.asarray(labels_pred)
# input checks
if labels_true.ndim != 1:
raise ValueError(
"labels_true must be 1D: shape is %r" % (labels_true.shape,))
if labels_pred.ndim != 1:
raise ValueError(
"labels_pred must be 1D: shape is %r" % (labels_pred.shape,))
if labels_true.shape != labels_pred.shape:
raise ValueError(
"labels_true and labels_pred must have same size, got %d and %d"
% (labels_true.shape[0], labels_pred.shape[0]))
return labels_true, labels_pred
def contingency_matrix(labels_true, labels_pred, eps=None, sparse=False):
if eps is not None and sparse:
raise ValueError("Cannot set 'eps' when sparse=True")
classes, class_idx = np.unique(labels_true, return_inverse=True)
clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)
n_classes = classes.shape[0]
n_clusters = clusters.shape[0]
# Using coo_matrix to accelerate simple histogram calculation,
# i.e. bins are consecutive integers
# Currently, coo_matrix is faster than histogram2d for simple cases
contingency = sp.coo_matrix((np.ones(class_idx.shape[0]),
(class_idx, cluster_idx)),
shape=(n_classes, n_clusters),
dtype=np.int32)
if sparse:
contingency = contingency.tocsr()
contingency.sum_duplicates()
else:
contingency = contingency.toarray()
if eps is not None:
# don't use += as contingency is integer
contingency = contingency + eps
return contingency
def fscore(labels_true, labels_pred, sparse=False):
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
n_samples, = labels_true.shape
c = contingency_matrix(labels_true, labels_pred, sparse=True)
tk = np.dot(c.data, c.data) - n_samples
pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples
qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples
avg_pre = tk / pk
avg_rec = tk / qk
fscore = 2. * avg_pre * avg_rec / (avg_pre + avg_rec)
return 100 * avg_pre, 100 * avg_rec, 100 * fscore
if __name__ == '__main__':
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [5, 5, 3, 4, 4, 4]
precision, recall, fscore = fscore(labels_true, labels_pred)
print("precision:", precision, "recall:", recall, "fscore:", fscore)
# precision: 100.0 recall: 66.66666666666666 fscore: 80.0
本文作者:Dong
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 CC BY-NC。本作品采用《知识共享署名-非商业性使用 4.0 国际许可协议》进行许可。您可以在非商业用途下自由转载和修改,但必须注明出处并提供原作者链接。 许可协议。转载请注明出处!