Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions src/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,78 @@ function precision_recall_curve(scores, y, positive_class)
end


# # AVERAGE PRECISION

const DOC_AVERAGE_PRECISION =
"""

Average precision is the area under the empirical precision-recall curve, understood as a
step function. This is to be contrasted with measures going under the name "area
under the precision-recall curve", in which the step function is usually replaced by a
piece-wise linear approximation. Generally, differences between the two are only obvious
when the number of observations is small, but it is faster to compute average precision.

Reference: Wikipedia entry, [Average
precision](https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision)

# Definition

Adopting each distinct predicted probability ``p_1, p_2, \\ldots, p_k`` for the positive
class as a soft probability threshold for predicting an actual class, and assuming these
thresholds are arranged in decreasing order, we obtain corresponding recalls ``R_1, R_2,
\\ldots, R_k`` (monotonically increasing) and precisions ``P_1, P_2, \\ldots,
P_k``. Adding an extra recall, ``R_{k+1} = 1``, the average precision implemented here is
defined as

``\\sum_{j=1}^k P_j (R_{j+1} - R_j)``

In some other implementations, such as
[scikit-learn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html#sklearn.metrics.average_precision_score),
``P_j`` is replaced by ``P_{j+1}``. However, this requires the definition of a precision
for unit recall, in the case the predicted positive class probabilities exclude `1.0`, and
this is avoided here.

"""

"""
function average_precision(ŷ, y, positive_class)

Return the average precision.

Here `ŷ` is a vector of predicted numerical probabilities of the specified
`positive_class`, which is one of two possible values occurring in the accompanying vector
`y` of ground truth observations.

$DOC_AVERAGE_PRECISION

$DOC_CONFUSION_CHECK Method requires at least one observation, but this is not checked.

"""
function average_precision(ŷ, y, positive_class)

recalls, precisions, _ = precision_recall_curve(ŷ, y, positive_class)
area = 0.0

# `recalls` will have length at least two:
length(recalls) > 2 || return 1.0

r = recalls[1]

# We ignore the last precision, as this does not correspond to any predicted
# probability, but is rather an artifact to ensure precision-recall curves always have
# a recall=1 point. See the definition in the docstring.
# `precison` and `recall` are `Vector` type object so they have fast linear indexing starting from 1.
for i in Base.OneTo(length(precisions)-1)
r_next = recalls[i + 1]
Δr = r_next - r
r = r_next
area = muladd(precisions[i], Δr, area)
end

return area
end


# # AUC

const DOC_AUC_REF =
Expand Down
57 changes: 57 additions & 0 deletions src/probabilistic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,63 @@ function l2_check(measure, yhat, y, weight_args...)
return nothing
end

# ---------------------------------------------------------
# AveragePrecision

struct _AveragePrecision end

function (m::_AveragePrecision)(ŷ::AbstractArray{<:UnivariateFinite}, y)
classes = CategoricalArrays.levels(first(ŷ))
warn_unordered(classes)
positive_class = last(classes)
scores = pdf.(ŷ, positive_class)

Functions.average_precision(scores, y, positive_class)
end

AveragePrecision() = _AveragePrecision() |> robust_measure |> fussy_measure

const AveragePrecisionType = API.FussyMeasure{
<:API.RobustMeasure{<:_AveragePrecision}
}

@fix_show AveragePrecision::AveragePrecisionType

# `AveragePrecision` will inherit traits from `_AveragePrecision`:
@trait(_AveragePrecision,
consumes_multiple_observations=true,
observation_scitype = OrderedFactor{2},
kind_of_proxy=LearnAPI.Distribution(),
orientation=Score(),
external_aggregation_mode=Mean(),
human_name = "average precision",
)

register(AveragePrecision, "average_precision")

const AveragePrecisionDoc = docstring(
"AveragePrecision()",
body=
"""
It is expected that `ŷ` be a vector of distributions over the binary set of unique
elements of `y`; specifically, `ŷ` should have eltype `<:UnivariateFinite` from the
CategoricalDistributions.jl package.

$(Functions.DOC_AVERAGE_PRECISION)

Core implementation: [`Functions.average_precision`](@ref).

""",
scitype = "",
footer="See also [`precision_recall_curve`](@ref). ",
)

"$AveragePrecisionDoc"
AveragePrecision
"$AveragePrecisionDoc"
const average_precision = AveragePrecision()


# ---------------------------------------------------------
# AreaUnderCurve

Expand Down
22 changes: 22 additions & 0 deletions test/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,28 @@ end
@test tprs ≈ sk_tprs
end

@testset "average_precision" begin
# compute "by hand":
recalls, precisions, _ = Functions.precision_recall_curve(ŷ, y, "1")
recalls[end] = 1.0
recall_deltas = [recalls[i + 1] - recalls[i] for i in 1:(length(recalls) - 1)]
area_deltas = precisions[1:(end -1)] .* recall_deltas
area = sum(area_deltas)

# compare:
@test Functions.average_precision(ŷ, y, "1") ≈ area

# repeat with an example whre the predicted probabilities include 1:
ŷ2 = [0.8, 1.0, 0.7, 0.1, 0.7, 0.8, 0.6, 0.7, 0.3, 0.9,
0.3, 0.8, 0.6, 0.7, 0.3, 0.9, 0.6, 0.7, 0.1, 0.8]
recalls, precisions, _ = Functions.precision_recall_curve(ŷ2, y, "1")
recalls[end] = 1.0
recall_deltas = [recalls[i + 1] - recalls[i] for i in 1:(length(recalls) - 1)]
area_deltas = precisions[1:(end -1)] .* recall_deltas
area = sum(area_deltas)
@test Functions.average_precision(ŷ2, y, "1") ≈ area
end

@testset "AUC" begin
# this is random binary and random scores generated with numpy
# then using roc_auc_score from sklearn to get the AUC
Expand Down
19 changes: 19 additions & 0 deletions test/probabilistic.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
@testset "AveragePrecision" begin
y = categorical(["O", "X", "X", "X", "X", "O", "O", "O", "X", "X"], ordered=true)
scores = [0.3, 0.2, 0.4, 0.9, 0.1, 0.4, 0.5, 0.2, 0.8, 0.7]
ŷ = UnivariateFinite(["O", "X"], scores, augment=true, pool=y)
core = Functions.average_precision(scores, y, "X")
wrapped = @test_logs AveragePrecision()(ŷ, y)
aliased = average_precision(ŷ, y)
@test core == wrapped == aliased

# unordered case:
y = categorical(["O", "X", "X", "X", "X", "O", "O", "O", "X", "X"])
ŷ = UnivariateFinite(["O", "X"], scores, augment=true, pool=y)
wrapped = @test_logs(
(:warn, StatisticalMeasures.warning_unordered(levels(y))),
AveragePrecision()(ŷ, y),
)
@test wrapped == core
end

@testset "AreaUnderCurve" begin
# this is random binary and random scores generated with numpy
# then using roc_auc_score from sklearn to get the AUC
Expand Down
Loading