-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_metrics.py
128 lines (106 loc) · 3.54 KB
/
eval_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import math
import numpy as np
def precision_at_k_per_sample(actual, predicted, topk):
num_hits = 0
for place in predicted:
if place in actual:
num_hits += 1
return num_hits / (topk + 0.0)
def precision_at_k(actual, predicted, topk):
sum_precision = 0.0
num_users = len(predicted)
for i in range(num_users):
act_set = set(actual[i])
pred_set = set(predicted[i][:topk])
sum_precision += len(act_set & pred_set) / float(topk)
return sum_precision / num_users
def recall_at_k(actual, predicted, topk):
sum_recall = 0.0
num_users = len(predicted)
true_users = 0
for i in range(num_users):
act_set = set(actual[i])
pred_set = set(predicted[i][:topk])
if len(act_set) != 0:
sum_recall += len(act_set & pred_set) / float(len(act_set))
true_users += 1
return sum_recall / true_users
def apk(actual, predicted, k=10):
"""
Computes the average precision at k.
This function computes the average precision at k between two lists of
items.
Parameters
----------
actual : list
A list of elements that are to be predicted (order doesn't matter)
predicted : list
A list of predicted elements (order does matter)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The average precision at k over the input lists
"""
if len(predicted)>k:
predicted = predicted[:k]
score = 0.0
num_hits = 0.0
for i,p in enumerate(predicted):
if p in actual and p not in predicted[:i]:
num_hits += 1.0
score += num_hits / (i+1.0)
if not actual:
return 0.0
return score / min(len(actual), k)
def mapk(actual, predicted, k=10):
"""
Computes the mean average precision at k.
This function computes the mean average prescision at k between two lists
of lists of items.
Parameters
----------
actual : list
A list of lists of elements that are to be predicted
(order doesn't matter in the lists)
predicted : list
A list of lists of predicted elements
(order matters in the lists)
k : int, optional
The maximum number of predicted elements
Returns
-------
score : double
The mean average precision at k over the input lists
"""
return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])
def ndcg_k(actual, predicted, topk):
res = 0
for user_id in range(len(actual)):
k = min(topk, len(actual[user_id]))
idcg = idcg_k(k)
dcg_k = sum([int(predicted[user_id][j] in set(actual[user_id])) / math.log(j+2, 2) for j in range(topk)])
res += dcg_k / idcg
return res / float(len(actual))
# Calculates the ideal discounted cumulative gain at k
def idcg_k(k):
res = sum([1.0/math.log(i+2, 2) for i in range(k)])
if not res:
return 1.0
else:
return res
# build ndcg_list for each user
def ndcg_list(actual, predicted, topk):
res = []
for user_id in range(len(actual)):
k = min(topk, len(actual[user_id]))
idcg = idcg_k(k)
dcg_k = sum([int(predicted[user_id][j] in set(actual[user_id])) / math.log(j+2, 2) for j in range(topk)])
res.append(dcg_k / idcg)
# res.append(dcg_k)
return res
if __name__ == '__main__':
actual = [[1, 2], [3, 4, 5]]
predicted = [[10, 20, 1, 30, 40], [10, 3, 20, 4, 5]]
print(ndcg_k(actual, predicted, 5))