QuantumKitHub · kshyatt · Dec 23, 2025 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/Project.toml b/Project.toml
@@ -32,7 +32,7 @@ GenericLinearAlgebra = "0.3.19"
 GenericSchur = "0.5.6"
 JET = "0.9, 0.10"
 LinearAlgebra = "1"
-Mooncake = "0.4.183"
+Mooncake = "0.4.195"
 ParallelTestRunner = "2"
 Random = "1"
 SafeTestsets = "0.1"

diff --git a/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl b/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl
@@ -7,7 +7,7 @@ using MatrixAlgebraKit: diagview, sign_safe
 using MatrixAlgebraKit: LQViaTransposedQR, TruncationStrategy, NoTruncation, TruncationByValue, AbstractAlgorithm
 using MatrixAlgebraKit: default_qr_algorithm, default_lq_algorithm, default_svd_algorithm, default_eigh_algorithm
 import MatrixAlgebraKit: _gpu_geqrf!, _gpu_ungqr!, _gpu_unmqr!, _gpu_gesvd!, _gpu_Xgesvdp!, _gpu_gesvdj!
-import MatrixAlgebraKit: _gpu_heevj!, _gpu_heevd!, _gpu_heev!, _gpu_heevx!
+import MatrixAlgebraKit: _gpu_heevj!, _gpu_heevd!, _gpu_heev!, _gpu_heevx!, _sylvester
 using AMDGPU
 using LinearAlgebra
 using LinearAlgebra: BlasFloat
@@ -171,4 +171,9 @@ end
 MatrixAlgebraKit._ind_intersect(A::ROCVector{Int}, B::ROCVector{Int}) =
     MatrixAlgebraKit._ind_intersect(collect(A), collect(B))
 
+function _sylvester(A::AnyROCMatrix, B::AnyROCMatrix, C::AnyROCMatrix)
+    hX = sylvester(collect(A), collect(B), collect(C))
+    return ROCArray(hX)
+end
+
 end
diff --git a/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl b/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl
@@ -3,11 +3,11 @@ module MatrixAlgebraKitCUDAExt
 using MatrixAlgebraKit
 using MatrixAlgebraKit: @algdef, Algorithm, check_input
 using MatrixAlgebraKit: one!, zero!, uppertriangular!, lowertriangular!
-using MatrixAlgebraKit: diagview, sign_safe
+using MatrixAlgebraKit: diagview, sign_safe, default_pullback_gauge_atol, default_pullback_rank_atol
 using MatrixAlgebraKit: LQViaTransposedQR, TruncationByValue, AbstractAlgorithm
 using MatrixAlgebraKit: default_qr_algorithm, default_lq_algorithm, default_svd_algorithm, default_eig_algorithm, default_eigh_algorithm
 import MatrixAlgebraKit: _gpu_geqrf!, _gpu_ungqr!, _gpu_unmqr!, _gpu_gesvd!, _gpu_Xgesvdp!, _gpu_Xgesvdr!, _gpu_gesvdj!, _gpu_geev!
-import MatrixAlgebraKit: _gpu_heevj!, _gpu_heevd!
+import MatrixAlgebraKit: _gpu_heevj!, _gpu_heevd!, _sylvester
 using CUDA, CUDA.CUBLAS
 using CUDA: i32
 using LinearAlgebra
@@ -195,4 +195,18 @@ end
 MatrixAlgebraKit._ind_intersect(A::CuVector{Int}, B::CuVector{Int}) =
     MatrixAlgebraKit._ind_intersect(collect(A), collect(B))
 
+MatrixAlgebraKit.default_pullback_rank_atol(A::AnyCuArray) = eps(norm(CuArray(A), Inf))^(3 / 4)
+MatrixAlgebraKit.default_pullback_gauge_atol(A::AnyCuArray) = MatrixAlgebraKit.iszerotangent(A) ? 0 : eps(norm(CuArray(A), Inf))^(3 / 4)
+function MatrixAlgebraKit.default_pullback_gauge_atol(A::AnyCuArray, As...)
+    As′ = filter(!MatrixAlgebraKit.iszerotangent, (A, As...))
+    return isempty(As′) ? 0 : eps(norm(CuArray.(As′), Inf))^(3 / 4)
+end
+
+function _sylvester(A::AnyCuMatrix, B::AnyCuMatrix, C::AnyCuMatrix)
+    # https://github.com/JuliaGPU/CUDA.jl/issues/3021
+    # to add native sylvester to CUDA
+    hX = sylvester(collect(A), collect(B), collect(C))
+    return CuArray(hX)
+end
+
 end
diff --git a/ext/MatrixAlgebraKitChainRulesCoreExt.jl b/ext/MatrixAlgebraKitChainRulesCoreExt.jl
@@ -95,6 +95,9 @@ for eig in (:eig, :eigh)
     eig_t! = Symbol(eig, "_trunc!")
     eig_t_pb = Symbol(eig, "_trunc_pullback")
     _make_eig_t_pb = Symbol("_make_", eig_t_pb)
+    eig_t_ne! = Symbol(eig, "_trunc_no_error!")
+    eig_t_ne_pb = Symbol(eig, "_trunc_no_error_pullback")
+    _make_eig_t_ne_pb = Symbol("_make_", eig_t_ne_pb)
     eig_v = Symbol(eig, "_vals")
     eig_v! = Symbol(eig_v, "!")
     eig_v_pb = Symbol(eig_v, "_pullback")
@@ -136,6 +139,24 @@ for eig in (:eig, :eigh)
             end
             return $eig_t_pb
         end
+        function ChainRulesCore.rrule(::typeof($eig_t_ne!), A, DV, alg::TruncatedAlgorithm)
+            Ac = copy_input($eig_f, A)
+            DV = $(eig_f!)(Ac, DV, alg.alg)
+            DV′, ind = MatrixAlgebraKit.truncate($eig_t!, DV, alg.trunc)
+            return DV′, $(_make_eig_t_ne_pb)(A, DV, ind)
+        end
+        function $(_make_eig_t_ne_pb)(A, DV, ind)
+            function $eig_t_ne_pb(ΔDV)
+                ΔA = zero(A)
+                ΔD, ΔV = ΔDV
+                MatrixAlgebraKit.$eig_pb!(ΔA, A, DV, unthunk.((ΔD, ΔV)), ind)
+                return NoTangent(), ΔA, ZeroTangent(), NoTangent()
+            end
+            function $eig_t_ne_pb(::Tuple{ZeroTangent, ZeroTangent}) # is this extra definition useful?
+                return NoTangent(), ZeroTangent(), ZeroTangent(), NoTangent()
+            end
+            return $eig_t_ne_pb
+        end
         function ChainRulesCore.rrule(::typeof($eig_v!), A, D, alg)
             DV = $eig_f(A, alg)
             function $eig_v_pb(ΔD)

diff --git a/ext/MatrixAlgebraKitMooncakeExt/MatrixAlgebraKitMooncakeExt.jl b/ext/MatrixAlgebraKitMooncakeExt/MatrixAlgebraKitMooncakeExt.jl
@@ -3,7 +3,7 @@ module MatrixAlgebraKitMooncakeExt
 using Mooncake
 using Mooncake: DefaultCtx, CoDual, Dual, NoRData, rrule!!, frule!!, arrayify, @is_primitive
 using MatrixAlgebraKit
-using MatrixAlgebraKit: inv_safe, diagview, copy_input
+using MatrixAlgebraKit: inv_safe, diagview, copy_input, initialize_output
 using MatrixAlgebraKit: qr_pullback!, lq_pullback!
 using MatrixAlgebraKit: qr_null_pullback!, lq_null_pullback!
 using MatrixAlgebraKit: eig_pullback!, eigh_pullback!, eig_vals_pullback!
@@ -18,14 +18,16 @@ Mooncake.tangent_type(::Type{<:MatrixAlgebraKit.AbstractAlgorithm}) = Mooncake.N
 @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(copy_input), Any, Any}
 function Mooncake.rrule!!(::CoDual{typeof(copy_input)}, f_df::CoDual, A_dA::CoDual)
     Ac = copy_input(Mooncake.primal(f_df), Mooncake.primal(A_dA))
-    dAc = Mooncake.zero_tangent(Ac)
+    Ac_dAc = Mooncake.zero_fcodual(Ac)
+    dAc = Mooncake.tangent(Ac_dAc)
     function copy_input_pb(::NoRData)
         Mooncake.increment!!(Mooncake.tangent(A_dA), dAc)
         return NoRData(), NoRData(), NoRData()
     end
-    return CoDual(Ac, dAc), copy_input_pb
+    return Ac_dAc, copy_input_pb
 end
 
+Mooncake.@zero_derivative Mooncake.DefaultCtx Tuple{typeof(initialize_output), Any, Any, Any}
 # two-argument in-place factorizations like LQ, QR, EIG
 for (f!, f, pb, adj) in (
         (:qr_full!, :qr_full, :qr_pullback!, :qr_adjoint),

diff --git a/src/common/defaults.jl b/src/common/defaults.jl
@@ -34,6 +34,7 @@ default_pullback_degeneracy_atol(A) = eps(norm(A, Inf))^(3 / 4)
 Default tolerance for deciding what values should be considered equal to 0.
 """
 default_pullback_rank_atol(A) = eps(norm(A, Inf))^(3 / 4)
+default_pullback_rank_atol(A::Diagonal) = default_pullback_rank_atol(diagview(A))
 
 """
     default_hermitian_tol(A)

diff --git a/src/common/pullbacks.jl b/src/common/pullbacks.jl
@@ -10,3 +10,6 @@ function iszerotangent end
 
 iszerotangent(::Any) = false
 iszerotangent(::Nothing) = true
+
+# fallback
+_sylvester(A, B, C) = LinearAlgebra.sylvester(A, B, C)
diff --git a/src/pullbacks/eig.jl b/src/pullbacks/eig.jl
@@ -78,16 +78,6 @@ function eig_pullback!(
     end
     return ΔA
 end
-function eig_pullback!(
-        ΔA::Diagonal, A, DV, ΔDV, ind = Colon();
-        degeneracy_atol::Real = default_pullback_rank_atol(DV[1]),
-        gauge_atol::Real = default_pullback_gauge_atol(ΔDV[2])
-    )
-    ΔA_full = zero!(similar(ΔA, size(ΔA)))
-    ΔA_full = eig_pullback!(ΔA_full, A, DV, ΔDV, ind; degeneracy_atol, gauge_atol)
-    diagview(ΔA) .+= diagview(ΔA_full)
-    return ΔA
-end
 
 """
     eig_trunc_pullback!(
@@ -150,7 +140,7 @@ function eig_trunc_pullback!(
     # add contribution from orthogonal complement
     PA = A - (A * V) / V
     Y = mul!(ΔVperp, PA', Z, 1, 1)
-    X = sylvester(PA', -Dmat', Y)
+    X = _sylvester(PA', -Dmat', Y)
     Z .+= X
 
     if eltype(ΔA) <: Real
@@ -161,16 +151,6 @@ function eig_trunc_pullback!(
     end
     return ΔA
 end
-function eig_trunc_pullback!(
-        ΔA::Diagonal, A, DV, ΔDV;
-        degeneracy_atol::Real = default_pullback_rank_atol(DV[1]),
-        gauge_atol::Real = default_pullback_gauge_atol(ΔDV[2])
-    )
-    ΔA_full = zero!(similar(ΔA, size(ΔA)))
-    ΔA_full = eig_trunc_pullback!(ΔA_full, A, DV, ΔDV; degeneracy_atol, gauge_atol)
-    diagview(ΔA) .+= diagview(ΔA_full)
-    return ΔA
-end
 
 """
     eig_vals_pullback!(
@@ -195,3 +175,25 @@ function eig_vals_pullback!(
     ΔDV = (diagonal(ΔD), nothing)
     return eig_pullback!(ΔA, A, DV, ΔDV, ind; degeneracy_atol)
 end
+
+function eig_pullback!(
+        ΔA::Diagonal, A, DV, ΔDV, ind = Colon();
+        degeneracy_atol::Real = default_pullback_rank_atol(DV[1]),
+        gauge_atol::Real = default_pullback_gauge_atol(ΔDV[2])
+    )
+    ΔA_full = zero!(similar(ΔA, size(ΔA)))
+    eig_pullback!(ΔA_full, A, DV, ΔDV, ind; degeneracy_atol, gauge_atol)
+    diagview(ΔA) .+= diagview(ΔA_full)
+    return ΔA
+end
+
+function eig_trunc_pullback!(
+        ΔA::Diagonal, A, DV, ΔDV;
+        degeneracy_atol::Real = default_pullback_rank_atol(DV[1]),
+        gauge_atol::Real = default_pullback_gauge_atol(ΔDV[2])
+    )
+    ΔA_full = zero!(similar(ΔA, size(ΔA)))
+    eig_trunc_pullback!(ΔA_full, A, DV, ΔDV; degeneracy_atol, gauge_atol)
+    diagview(ΔA) .+= diagview(ΔA_full)
+    return ΔA
+end
diff --git a/src/pullbacks/eigh.jl b/src/pullbacks/eigh.jl
@@ -1,3 +1,15 @@
+function check_eigh_cotangents(
+        D, aVᴴΔV;
+        degeneracy_atol::Real = default_pullback_rank_atol(D),
+        gauge_atol::Real = default_pullback_gauge_atol(aVᴴΔV)
+    )
+    mask = abs.(D' .- D) .< degeneracy_atol
+    Δgauge = norm(view(aVᴴΔV, mask))
+    Δgauge ≤ gauge_atol ||
+        @warn "`eigh` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+    return
+end
+
 """
     eigh_pullback!(
         ΔA::AbstractMatrix, A, DV, ΔDV, [ind];
@@ -41,12 +53,7 @@ function eigh_pullback!(
         length(indV) == pV || throw(DimensionMismatch())
         mul!(view(VᴴΔV, :, indV), V', ΔV)
         aVᴴΔV = project_antihermitian(VᴴΔV) # can't use in-place or recycling doesn't work
-
-        mask = abs.(D' .- D) .< degeneracy_atol
-        Δgauge = norm(view(aVᴴΔV, mask))
-        Δgauge ≤ gauge_atol ||
-            @warn "`eigh` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
-
+        check_eigh_cotangents(D, aVᴴΔV; degeneracy_atol, gauge_atol)
         aVᴴΔV .*= inv_safe.(D' .- D, degeneracy_atol)
 
         if !iszerotangent(ΔDmat)
@@ -120,10 +127,7 @@ function eigh_trunc_pullback!(
         VᴴΔV = V' * ΔV
         aVᴴΔV = project_antihermitian!(VᴴΔV)
 
-        mask = abs.(D' .- D) .< degeneracy_atol
-        Δgauge = norm(view(aVᴴΔV, mask))
-        Δgauge ≤ gauge_atol ||
-            @warn "`eigh` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+        check_eigh_cotangents(D, aVᴴΔV; degeneracy_atol, gauge_atol)
 
         aVᴴΔV .*= inv_safe.(D' .- D, degeneracy_atol)
 
@@ -138,7 +142,7 @@ function eigh_trunc_pullback!(
         # add contribution from orthogonal complement
         W = qr_null(V)
         WᴴΔV = W' * ΔV
-        X = sylvester(W' * A * W, -Dmat, WᴴΔV)
+        X = _sylvester(W' * A * W, -Dmat, WᴴΔV)
         Z = mul!(Z, W, X, 1, 1)
 
         # put everything together: symmetrize for hermitian case

diff --git a/src/pullbacks/lq.jl b/src/pullbacks/lq.jl
@@ -1,3 +1,42 @@
+function check_lq_cotangents(
+        L, Q, ΔL, ΔQ, minmn::Int, p::Int;
+        gauge_atol::Real = default_pullback_gauge_atol(ΔQ)
+    )
+    if minmn > p # case where A is rank-deficient
+        Δgauge = abs(zero(eltype(Q)))
+        if !iszerotangent(ΔQ)
+            # in this case the number Householder reflections will
+            # change upon small variations, and all of the remaining
+            # columns of ΔQ should be zero for a gauge-invariant
+            # cost function
+            ΔQ2 = view(ΔQ, (p + 1):size(Q, 1), :)
+            Δgauge = max(Δgauge, norm(ΔQ2))
+        end
+        if !iszerotangent(ΔL)
+            ΔL22 = view(ΔL, (p + 1):size(L, 1), (p + 1):minmn)
+            Δgauge = max(Δgauge, norm(ΔL22))
+        end
+        Δgauge ≤ gauge_atol ||
+            @warn "`lq` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+    end
+    return
+end
+
+function check_lq_full_cotangents(Q1, ΔQ2, ΔQ2Q1ᴴ; gauge_atol::Real = default_pullback_gauge_atol(Q1))
+    # in the case where A is full rank, but there are more columns in Q than in A
+    # (the case of `lq_full`), there is gauge-invariant information in the
+    # projection of ΔQ2 onto the column space of Q1, by virtue of Q being a unitary
+    # matrix. As the number of Householder reflections is in fixed in the full rank
+    # case, Q is expected to rotate smoothly (we might even be able to predict) also
+    # how the full Q2 will change, but this we omit for now, and we consider
+    # Q2' * ΔQ2 as a gauge dependent quantity.
+    Δgauge = norm(mul!(copy(ΔQ2), ΔQ2Q1ᴴ, Q1, -1, 1), Inf)
+    Δgauge ≤ gauge_atol ||
+        @warn "`lq` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+    return
+end
+
+
 """
     lq_pullback!(
         ΔA, A, LQ, ΔLQ;
@@ -36,28 +75,12 @@ function lq_pullback!(
     ΔA1 = view(ΔA, 1:p, :)
     ΔA2 = view(ΔA, (p + 1):m, :)
 
-    if minmn > p # case where A is rank-deficient
-        Δgauge = abs(zero(eltype(Q)))
-        if !iszerotangent(ΔQ)
-            # in this case the number Householder reflections will
-            # change upon small variations, and all of the remaining
-            # columns of ΔQ should be zero for a gauge-invariant
-            # cost function
-            ΔQ2 = view(ΔQ, (p + 1):size(Q, 1), :)
-            Δgauge = max(Δgauge, norm(ΔQ2, Inf))
-        end
-        if !iszerotangent(ΔL)
-            ΔL22 = view(ΔL, (p + 1):m, (p + 1):minmn)
-            Δgauge = max(Δgauge, norm(ΔL22, Inf))
-        end
-        Δgauge ≤ gauge_atol ||
-            @warn "`lq` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
-    end
+    check_lq_cotangents(L, Q, ΔL, ΔQ, minmn, p; gauge_atol)
 
     ΔQ̃ = zero!(similar(Q, (p, n)))
     if !iszerotangent(ΔQ)
         ΔQ1 = view(ΔQ, 1:p, :)
-        copy!(ΔQ̃, ΔQ1)
+        ΔQ̃ .= ΔQ1
         if p < size(Q, 1)
             Q2 = view(Q, (p + 1):size(Q, 1), :)
             ΔQ2 = view(ΔQ, (p + 1):size(Q, 1), :)
@@ -69,9 +92,7 @@ function lq_pullback!(
             # how the full Q2 will change, but this we omit for now, and we consider
             # Q2' * ΔQ2 as a gauge dependent quantity.
             ΔQ2Q1ᴴ = ΔQ2 * Q1'
-            Δgauge = norm(mul!(copy(ΔQ2), ΔQ2Q1ᴴ, Q1, -1, 1), Inf)
-            Δgauge ≤ gauge_atol ||
-                @warn "`lq` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+            check_lq_full_cotangents(Q1, ΔQ2, ΔQ2Q1ᴴ; gauge_atol)
             ΔQ̃ = mul!(ΔQ̃, ΔQ2Q1ᴴ', Q2, -1, 1)
         end
     end
@@ -95,12 +116,32 @@ function lq_pullback!(
         Md = diagview(M)
         Md .= real.(Md)
     end
-    ldiv!(LowerTriangular(L11)', M)
-    ldiv!(LowerTriangular(L11)', ΔQ̃)
+    # not GPU friendly...
+    L11arr = typeof(L)(L11)
+    ldiv!(LowerTriangular(L11arr)', M)
+    ldiv!(LowerTriangular(L11arr)', ΔQ̃)
     ΔA1 = mul!(ΔA1, M, Q1, +1, 1)
     ΔA1 .+= ΔQ̃
     return ΔA
 end
+function lq_pullback!(
+        ΔA::Diagonal, A, LQ, ΔLQ;
+        rank_atol::Real = default_pullback_rank_atol(LQ[1]),
+        gauge_atol::Real = default_pullback_gauge_atol(ΔLQ[2])
+    )
+    ΔA_full = zero!(similar(ΔA, size(ΔA)))
+    ΔA_full = lq_pullback!(ΔA_full, A, LQ, ΔLQ; rank_atol, gauge_atol)
+    diagview(ΔA) .+= diagview(ΔA_full)
+    return ΔA
+end
+
+function check_lq_null_cotangents(Nᴴ, ΔNᴴ; gauge_atol::Real = default_pullback_gauge_atol(ΔNᴴ))
+    aNᴴΔN = project_antihermitian!(Nᴴ * ΔNᴴ')
+    Δgauge = norm(aNᴴΔN)
+    Δgauge ≤ gauge_atol ||
+        @warn "`lq_null` cotangent sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+    return
+end
 
 """
     lq_null_pullback!(
@@ -118,10 +159,7 @@ function lq_null_pullback!(
         gauge_atol::Real = default_pullback_gauge_atol(ΔNᴴ)
     )
     if !iszerotangent(ΔNᴴ) && size(Nᴴ, 1) > 0
-        aNᴴΔN = project_antihermitian!(Nᴴ * ΔNᴴ')
-        Δgauge = norm(aNᴴΔN)
-        Δgauge ≤ gauge_atol ||
-            @warn "`lq_null` cotangent sensitive to gauge choice: (|Δgauge| = $Δgauge)"
+        check_lq_null_cotangents(Nᴴ, ΔNᴴ; gauge_atol)
         L, Q = lq_compact(A; positive = true) # should we be able to provide algorithm here?
         X = ldiv!(LowerTriangular(L)', Q * ΔNᴴ')
         ΔA = mul!(ΔA, X, Nᴴ, -1, 1)