JuliaAI · ablaom · Mar 22, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 12, 2026
diff --git a/src/functions.jl b/src/functions.jl
@@ -276,6 +276,78 @@ function precision_recall_curve(scores, y, positive_class)
 end
 
 
+# # AVERAGE PRECISION
+
+const DOC_AVERAGE_PRECISION =
+"""
+
+Average precision is the area under the empirical precision-recall curve, understood as a
+step function. This is to be contrasted with measures going under the name "area
+under the precision-recall curve", in which the step function is usually replaced by a
+piece-wise linear approximation. Generally, differences between the two are only obvious
+when the number of observations is small, but it is faster to compute average precision.
+
+Reference: Wikipedia entry, [Average
+precision](https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision)
+
+# Definition
+
+Adopting each distinct predicted probability ``p_1, p_2, \\ldots, p_k`` for the positive
+class as a soft probability threshold for predicting an actual class, and assuming these
+thresholds are arranged in decreasing order, we obtain corresponding recalls ``R_1, R_2,
+\\ldots, R_k`` (monotonically increasing) and precisions ``P_1, P_2, \\ldots,
+P_k``. Adding an extra recall, ``R_{k+1} = 1``, the average precision implemented here is
+defined as
+
+``\\sum_{j=1}^k P_j (R_{j+1} - R_j)``
+
+In some other implementations, such as
+[scikit-learn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html#sklearn.metrics.average_precision_score),
+``P_j`` is replaced by ``P_{j+1}``. However, this requires the definition of a precision
+for unit recall, in the case the predicted positive class probabilities exclude `1.0`, and
+this is avoided here.
+
+"""
+
+"""
+    function average_precision(ŷ, y, positive_class)
+
+Return the average precision.
+
+Here `ŷ` is a vector of predicted numerical probabilities of the specified
+`positive_class`, which is one of two possible values occurring in the accompanying vector
+`y` of ground truth observations.
+
+$DOC_AVERAGE_PRECISION
+
+$DOC_CONFUSION_CHECK Method requires at least one observation, but this is not checked.
+
+"""
+function average_precision(ŷ, y, positive_class)
+
+    recalls, precisions, _ = precision_recall_curve(ŷ, y, positive_class)
+    area = 0.0
+
+    # `recalls` will have length at least two:
+    length(recalls) > 2 || return 1.0
+
+    r = recalls[1]
+
+    # We ignore the last precision, as this does not correspond to any predicted
+    # probability, but is rather an artifact to ensure precision-recall curves always have
+    # a recall=1 point. See the definition in the docstring.
+    # `precison` and `recall` are `Vector` type object so they have fast linear indexing starting from 1.
+    for i in Base.OneTo(length(precisions)-1)
+        r_next = recalls[i + 1]
+        Δr = r_next - r
+        r = r_next
+        area = muladd(precisions[i], Δr, area)
+    end
+
+    return area
+end
+
+
 # # AUC
 
 const DOC_AUC_REF =

diff --git a/src/probabilistic.jl b/src/probabilistic.jl
@@ -60,6 +60,63 @@ function l2_check(measure, yhat, y, weight_args...)
     return nothing
 end
 
+# ---------------------------------------------------------
+# AveragePrecision
+
+struct _AveragePrecision end
+
+function (m::_AveragePrecision)(ŷ::AbstractArray{<:UnivariateFinite}, y)
+    classes = CategoricalArrays.levels(first(ŷ))
+    warn_unordered(classes)
+    positive_class = last(classes)
+    scores = pdf.(ŷ, positive_class)
+
+    Functions.average_precision(scores, y, positive_class)
+end
+
+AveragePrecision() = _AveragePrecision() |> robust_measure |> fussy_measure
+
+const AveragePrecisionType = API.FussyMeasure{
+    <:API.RobustMeasure{<:_AveragePrecision}
+}
+
+@fix_show AveragePrecision::AveragePrecisionType
+
+# `AveragePrecision` will inherit traits from `_AveragePrecision`:
+@trait(_AveragePrecision,
+       consumes_multiple_observations=true,
+       observation_scitype = OrderedFactor{2},
+       kind_of_proxy=LearnAPI.Distribution(),
+       orientation=Score(),
+       external_aggregation_mode=Mean(),
+       human_name = "average precision",
+)
+
+register(AveragePrecision, "average_precision")
+
+const AveragePrecisionDoc = docstring(
+    "AveragePrecision()",
+    body=
+"""
+It is expected that `ŷ` be a vector of distributions over the binary set of unique
+elements of `y`; specifically, `ŷ` should have eltype `<:UnivariateFinite` from the
+CategoricalDistributions.jl package.
+
+$(Functions.DOC_AVERAGE_PRECISION)
+
+Core implementation: [`Functions.average_precision`](@ref).
+
+""",
+    scitype = "",
+    footer="See also [`precision_recall_curve`](@ref). ",
+)
+
+"$AveragePrecisionDoc"
+AveragePrecision
+"$AveragePrecisionDoc"
+const average_precision = AveragePrecision()
+
+
 # ---------------------------------------------------------
 # AreaUnderCurve
 

diff --git a/test/functions.jl b/test/functions.jl
@@ -45,6 +45,28 @@ end
     @test tprs ≈ sk_tprs
 end
 
+@testset "average_precision"  begin
+    # compute "by hand":
+    recalls, precisions, _ = Functions.precision_recall_curve(ŷ, y, "1")
+    recalls[end] = 1.0
+    recall_deltas = [recalls[i + 1] - recalls[i] for i in 1:(length(recalls) - 1)]
+    area_deltas = precisions[1:(end -1)] .* recall_deltas
+    area = sum(area_deltas)
+
+    # compare:
+    @test Functions.average_precision(ŷ, y, "1") ≈ area
+
+    # repeat with an example whre the predicted probabilities include 1:
+    ŷ2 = [0.8, 1.0, 0.7, 0.1, 0.7, 0.8, 0.6, 0.7, 0.3, 0.9,
+     0.3, 0.8, 0.6, 0.7, 0.3, 0.9, 0.6, 0.7, 0.1, 0.8]
+    recalls, precisions, _ = Functions.precision_recall_curve(ŷ2, y, "1")
+    recalls[end] = 1.0
+    recall_deltas = [recalls[i + 1] - recalls[i] for i in 1:(length(recalls) - 1)]
+    area_deltas = precisions[1:(end -1)] .* recall_deltas
+    area = sum(area_deltas)
+    @test Functions.average_precision(ŷ2, y, "1") ≈ area
+end
+
 @testset "AUC" begin
     # this is random binary and random scores generated with numpy
     # then using roc_auc_score from sklearn to get the AUC

diff --git a/test/probabilistic.jl b/test/probabilistic.jl
@@ -1,3 +1,22 @@
+@testset "AveragePrecision" begin
+    y = categorical(["O", "X", "X", "X", "X", "O", "O", "O", "X", "X"], ordered=true)
+    scores = [0.3, 0.2, 0.4, 0.9, 0.1, 0.4, 0.5, 0.2, 0.8, 0.7]
+    ŷ = UnivariateFinite(["O", "X"], scores, augment=true, pool=y)
+    core = Functions.average_precision(scores, y, "X")
+    wrapped = @test_logs AveragePrecision()(ŷ, y)
+    aliased = average_precision(ŷ, y)
+    @test  core == wrapped == aliased
+
+    # unordered case:
+    y = categorical(["O", "X", "X", "X", "X", "O", "O", "O", "X", "X"])
+    ŷ = UnivariateFinite(["O", "X"], scores, augment=true, pool=y)
+    wrapped = @test_logs(
+        (:warn, StatisticalMeasures.warning_unordered(levels(y))),
+        AveragePrecision()(ŷ, y),
+    )
+    @test wrapped == core
+end
+
 @testset "AreaUnderCurve" begin
     # this is random binary and random scores generated with numpy
     # then using roc_auc_score from sklearn to get the AUC