diff --git a/src/bidiag.jl b/src/bidiag.jl index 88b5a7634..fe20ab420 100644 --- a/src/bidiag.jl +++ b/src/bidiag.jl @@ -1292,7 +1292,7 @@ function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector) nx, ny = length(x), length(y) (nx == size(B, 1) == ny) || throw(DimensionMismatch()) if nx ≤ 1 - nx == 0 && return dot(zero(eltype(x)), zero(eltype(B)), zero(eltype(y))) + nx == 0 && return zero(dot(zero(eltype(x)), zero(eltype(B)), zero(eltype(y)))) return dot(x[1], B.dv[1], y[1]) end ev, dv = B.ev, B.dv diff --git a/src/blas.jl b/src/blas.jl index 71e28e014..642a5f751 100644 --- a/src/blas.jl +++ b/src/blas.jl @@ -2102,7 +2102,7 @@ end her2k!(uplo, trans, alpha, A, B, beta, C) Rank-2k update of the Hermitian matrix `C` as -`alpha*A*B' + alpha*B*A' + beta*C` or `alpha*A'*B + alpha*B'*A + beta*C` +`alpha*A*B' + alpha'*B*A' + beta*C` or `alpha*A'*B + alpha'*B'*A + beta*C` according to [`trans`](@ref stdlib-blas-trans). The scalar `beta` has to be real. Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`. """ @@ -2111,8 +2111,8 @@ function her2k! end """ her2k(uplo, trans, alpha, A, B) -Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'` -or `alpha*A'*B + alpha*B'*A`, according to [`trans`](@ref stdlib-blas-trans). +Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha'*B*A'` +or `alpha*A'*B + alpha'*B'*A`, according to [`trans`](@ref stdlib-blas-trans). """ her2k(uplo, trans, alpha, A, B) diff --git a/src/generic.jl b/src/generic.jl index e030f22e7..45c1936ff 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -993,7 +993,8 @@ function dot(x::AbstractArray, y::AbstractArray) throw(DimensionMismatch(lazy"first array has length $(lx) which does not match the length of the second, $(length(y)).")) end if lx == 0 - return dot(zero(eltype(x)), zero(eltype(y))) + # make sure the returned result equals exactly the zero element + return zero(dot(zero(eltype(x)), zero(eltype(y)))) end s = zero(dot(first(x), first(y))) for (Ix, Iy) in zip(eachindex(x), eachindex(y)) @@ -1034,6 +1035,8 @@ dot(x, A, y) = dot(x, A*y) # generic fallback for cases that are not covered by function dot(x::AbstractVector, A::AbstractMatrix, y::AbstractVector) (axes(x)..., axes(y)...) == axes(A) || throw(DimensionMismatch()) + # outermost zero call to avoid spurious sign ambiguity (like 0.0 - 0.0im) + any(isempty, (x, y)) && return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) T = typeof(dot(first(x), first(A), first(y))) s = zero(T) i₁ = first(eachindex(x)) diff --git a/src/hessenberg.jl b/src/hessenberg.jl index 364501609..9c5e2e40f 100644 --- a/src/hessenberg.jl +++ b/src/hessenberg.jl @@ -404,7 +404,7 @@ function dot(x::AbstractVector, H::UpperHessenberg, y::AbstractVector) m = size(H, 1) (length(x) == m == length(y)) || throw(DimensionMismatch()) if iszero(m) - return dot(zero(eltype(x)), zero(eltype(H)), zero(eltype(y))) + return zero(dot(zero(eltype(x)), zero(eltype(H)), zero(eltype(y)))) end x₁ = x[1] r = dot(x₁, H[1,1], y[1]) diff --git a/src/matmul.jl b/src/matmul.jl index d618bcfe9..229e1972e 100644 --- a/src/matmul.jl +++ b/src/matmul.jl @@ -317,7 +317,7 @@ end BlasFlag.SYRK elseif (tA_uc == 'C' && tB_uc == 'N') || (tA_uc == 'N' && tB_uc == 'C') BlasFlag.HERK - else isntc + else BlasFlag.GEMM end else @@ -465,7 +465,7 @@ end throw(DimensionMismatch( LazyString( "incompatible destination size: ", - lazy"the destination $strC of $size_or_len_str_C $C_size_len is incomatible with the product of a $strA of size $sizeA and a $strB of $size_or_len_str_B $B_size_len. ", + lazy"the destination $strC of $size_or_len_str_C $C_size_len is incompatible with the product of a $strA of size $sizeA and a $strB of $size_or_len_str_B $B_size_len. ", lazy"The destination must be of $size_or_len_str_dest $destsize." ) ) @@ -499,7 +499,7 @@ function matmul2x2or3x3_nonzeroalpha!(C, tA, tB, A, B, α::Bool, β) return false end -# THE one big BLAS dispatch. This is split into two methods to improve latency +# THE one big BLAS dispatch. This is split into syrk/herk/gemm and symm/hemm/none methods to improve latency Base.@constprop :aggressive function generic_matmatmul_wrapper!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}, α::Number, β::Number, val::BlasFlag.SyrkHerkGemm) where {T<:Number} mA, nA = lapack_size(tA, A) @@ -511,6 +511,12 @@ Base.@constprop :aggressive function generic_matmatmul_wrapper!(C::StridedMatrix _syrk_herk_gemm_wrapper!(C, tA, tB, A, B, α, β, val) return C end + +function generic_matmatmul_wrapper!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, + α::Number, β::Number, ::Val{BlasFlag.GEMM}) where {T<:BlasReal} + gemm_wrapper!(C, tA, tB, A, B, α, β) +end + Base.@constprop :aggressive function _syrk_herk_gemm_wrapper!(C, tA, tB, A, B, α, β, ::Val{BlasFlag.SYRK}) if A === B tA_uc = uppercase(tA) # potentially strip a WrapperChar @@ -651,14 +657,6 @@ Base.@constprop :aggressive generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::S _add::MulAddMul = MulAddMul()) where {T<:BlasFloat} = generic_matmatmul!(C, tA, tB, A, B, _add.alpha, _add.beta) -function generic_matmatmul_wrapper!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, - α::Number, β::Number, ::Val{true}) where {T<:BlasReal} - gemm_wrapper!(C, tA, tB, A, B, α, β) -end -Base.@constprop :aggressive function generic_matmatmul_wrapper!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, - alpha::Number, beta::Number, ::Val{false}) where {T<:BlasReal} - _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), alpha, beta) -end # legacy method Base.@constprop :aggressive generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, _add::MulAddMul = MulAddMul()) where {T<:BlasReal} = diff --git a/src/symmetric.jl b/src/symmetric.jl index 50f340225..8af786b91 100644 --- a/src/symmetric.jl +++ b/src/symmetric.jl @@ -573,7 +573,7 @@ for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:(Hermitian{<:Uni if n != size(B, 2) throw(DimensionMismatch(lazy"A has dimensions $(size(A)) but B has dimensions $(size(B))")) end - + iszero(n) && return $real(zero(dot(zero(eltype(A)), zero(eltype(B))))) dotprod = $real(zero(dot(first(A), first(B)))) @inbounds if A.uplo == 'U' && B.uplo == 'U' for j in 1:n @@ -774,6 +774,7 @@ function dot(x::AbstractVector, A::HermOrSym, y::AbstractVector) require_one_based_indexing(x, y) n = length(x) (n == length(y) == size(A, 1)) || throw(DimensionMismatch()) + iszero(n) && return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) data = A.data s = dot(first(x), first(A), first(y)) r = zero(s+s) diff --git a/src/triangular.jl b/src/triangular.jl index d82ddd870..75c721bca 100644 --- a/src/triangular.jl +++ b/src/triangular.jl @@ -893,7 +893,7 @@ function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector) m = size(A, 1) (length(x) == m == length(y)) || throw(DimensionMismatch()) if iszero(m) - return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y))) + return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) end x₁ = x[1] r = dot(x₁, A[1,1], y[1]) @@ -914,7 +914,7 @@ function dot(x::AbstractVector, A::UnitUpperTriangular, y::AbstractVector) m = size(A, 1) (length(x) == m == length(y)) || throw(DimensionMismatch()) if iszero(m) - return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y))) + return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) end x₁ = first(x) r = dot(x₁, y[1]) @@ -936,7 +936,7 @@ function dot(x::AbstractVector, A::LowerTriangular, y::AbstractVector) m = size(A, 1) (length(x) == m == length(y)) || throw(DimensionMismatch()) if iszero(m) - return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y))) + return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) end r = zero(typeof(dot(first(x), first(A), first(y)))) @inbounds for j in axes(A, 2) @@ -956,7 +956,7 @@ function dot(x::AbstractVector, A::UnitLowerTriangular, y::AbstractVector) m = size(A, 1) (length(x) == m == length(y)) || throw(DimensionMismatch()) if iszero(m) - return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y))) + return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) end r = zero(typeof(dot(first(x), first(y)))) @inbounds for j in axes(A, 2) diff --git a/src/tridiag.jl b/src/tridiag.jl index a0e3d821c..2357ecc12 100644 --- a/src/tridiag.jl +++ b/src/tridiag.jl @@ -253,7 +253,7 @@ function dot(x::AbstractVector, S::SymTridiagonal, y::AbstractVector) nx, ny = length(x), length(y) (nx == size(S, 1) == ny) || throw(DimensionMismatch("dot")) if nx ≤ 1 - nx == 0 && return dot(zero(eltype(x)), zero(eltype(S)), zero(eltype(y))) + nx == 0 && return zero(dot(zero(eltype(x)), zero(eltype(S)), zero(eltype(y)))) return dot(x[1], S.dv[1], y[1]) end dv, ev = S.dv, S.ev @@ -1022,7 +1022,7 @@ function dot(x::AbstractVector, A::Tridiagonal, y::AbstractVector) nx, ny = length(x), length(y) (nx == size(A, 1) == ny) || throw(DimensionMismatch()) if nx ≤ 1 - nx == 0 && return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y))) + nx == 0 && return zero(dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))) return dot(x[1], A.d[1], y[1]) end @inbounds begin diff --git a/test/blas.jl b/test/blas.jl index b7f4a03af..6cda7fbe5 100644 --- a/test/blas.jl +++ b/test/blas.jl @@ -721,15 +721,18 @@ end @test BLAS.her!('L', real(elty(2)), x, A) isa WrappedArray{elty,2} @test A == WrappedArray(elty[5 2+2im; 11+3im 20]) # Level 3 - A = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im]) + # Hermitian matrices require real diagonal elements + A = WrappedArray(elty[1 2+2im; 2-2im 4]) B = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im]) - C = WrappedArray(elty[1+im 2+2im; 3+3im 4+4im]) + C = WrappedArray(elty[1 2+2im; 2-2im 4]) @test BLAS.hemm!('L', 'U', elty(2), A, B, elty(1), C) isa WrappedArray{elty,2} - @test C == WrappedArray([3+27im 6+38im; 35+27im 52+36im]) + @test C == WrappedArray([3+26im 6+38im; 34+22im 52+32im]) + C = WrappedArray(elty[1 2+2im; 2-2im 4]) # reset C to Hermitian @test BLAS.herk!('U', 'N', real(elty(2)), A, real(elty(1)), C) isa WrappedArray{elty,2} - @test C == WrappedArray([23 50+38im; 35+27im 152]) + @test C == WrappedArray([19 22+22im; 2-2im 52]) + C = WrappedArray(elty[1 2+2im; 2-2im 4]) # reset C to Hermitian @test BLAS.her2k!('U', 'N', elty(2), A, B, real(elty(1)), C) isa WrappedArray{elty,2} - @test C == WrappedArray([63 138+38im; 35+27im 352]) + @test C == WrappedArray([37 56+20im; 2-2im 68]) end end diff --git a/test/generic.jl b/test/generic.jl index a56c1006f..d2bf2ffb9 100644 --- a/test/generic.jl +++ b/test/generic.jl @@ -784,6 +784,10 @@ end @test dot(x, B', y) ≈ dot(B*x, y) elty <: Real && @test dot(x, transpose(B), y) ≈ dot(x, transpose(B)*y) end + for (m, n) in ((0, 0), (1, 0), (0, 1)) + v = zeros(ComplexF64, m); a = zeros(ComplexF64, m, n); w = zeros(Float64, n) + @test dot(v, a, w) === zero(ComplexF64) + end end @testset "condskeel #34512" begin diff --git a/test/matmul.jl b/test/matmul.jl index 1fcf20094..d908a155e 100644 --- a/test/matmul.jl +++ b/test/matmul.jl @@ -696,23 +696,24 @@ end @test dot(Z, Z) == convert(elty, 34.0) end -dot1(x, y) = invoke(dot, Tuple{Any,Any}, x, y) -dot2(x, y) = invoke(dot, Tuple{AbstractArray,AbstractArray}, x, y) @testset "generic dot" begin + dot1(x, y) = invoke(dot, Tuple{Any,Any}, x, y) + dot2(x, y) = invoke(dot, Tuple{AbstractArray,AbstractArray}, x, y) AA = [1+2im 3+4im; 5+6im 7+8im] BB = [2+7im 4+1im; 3+8im 6+5im] for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2)) @test dot(A, B) == dot(vec(A), vec(B)) == dot1(A, B) == dot2(A, B) == dot(float.(A), float.(B)) - @test dot(Int[], Int[]) == 0 == dot1(Int[], Int[]) == dot2(Int[], Int[]) - @test_throws MethodError dot(Any[], Any[]) - @test_throws MethodError dot1(Any[], Any[]) - @test_throws MethodError dot2(Any[], Any[]) - for n1 = 0:2, n2 = 0:2, d in (dot, dot1, dot2) - if n1 != n2 - @test_throws DimensionMismatch d(1:n1, 1:n2) - else - @test d(1:n1, 1:n2) ≈ norm(1:n1)^2 - end + end + @test dot(Int[], Int[]) == 0 == dot1(Int[], Int[]) == dot2(Int[], Int[]) + @test dot(ComplexF64[], Float64[]) === dot(ComplexF64[;;], Float64[;;]) === zero(ComplexF64) + @test_throws MethodError dot(Any[], Any[]) + @test_throws MethodError dot1(Any[], Any[]) + @test_throws MethodError dot2(Any[], Any[]) + for n1 = 0:2, n2 = 0:2, d in (dot, dot1, dot2) + if n1 != n2 + @test_throws DimensionMismatch d(1:n1, 1:n2) + else + @test d(1:n1, 1:n2) ≈ norm(1:n1)^2 end end end diff --git a/test/symmetric.jl b/test/symmetric.jl index 031dac27c..581c6c4ff 100644 --- a/test/symmetric.jl +++ b/test/symmetric.jl @@ -470,6 +470,10 @@ end @test dot(symblockmu, symblockml) ≈ dot(msymblockmu, msymblockml) @test dot(symblockml, symblockmu) ≈ dot(msymblockml, msymblockmu) @test dot(symblockml, symblockml) ≈ dot(msymblockml, msymblockml) + + # empty matrices + @test dot(mtype(ComplexF64[;;], :U), mtype(Float64[;;], :U)) === zero(mtype == Hermitian ? Float64 : ComplexF64) + @test dot(mtype(ComplexF64[;;], :L), mtype(Float64[;;], :L)) === zero(mtype == Hermitian ? Float64 : ComplexF64) end end