Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use the macro kcopy! instead of broadcast #899

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/bicgstab.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,14 @@ kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose,
mul!(r₀, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r₀)
else
r₀ .= b
@kcopy!(n, r₀, b) # r₀ ← b
end

@kfill!(x, zero(FC)) # x₀
@kfill!(s, zero(FC)) # s₀
@kfill!(v, zero(FC)) # v₀
MisI || mulorldiv!(r, M, r₀, ldiv) # r₀
p .= r # p₁
@kcopy!(n, p, r) # p₁

α = one(FC) # α₀
ω = one(FC) # ω₀
Expand Down
12 changes: 6 additions & 6 deletions src/car.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,29 +127,29 @@ kwargs_car = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :ca
mul!(r, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r)
else
r .= b
@kcopy!(n, r, b) # r ← b
end

# p₀ = r₀ = M(b - Ax₀)
if MisI
p .= r
@kcopy!(n, p, r) # p ← r
else
mulorldiv!(p, M, r, ldiv)
r .= p
@kcopy!(n, r, p) # r ← p
end

mul!(s, A, r) # s₀ = Ar₀

# q₀ = MAp₀ and s₀ = MAr₀
if MisI
q .= s
@kcopy!(n, q, s) # q ← s
else
mulorldiv!(q, M, s, ldiv)
s .= q
@kcopy!(n, s, q) # s ← q
end

mul!(t, A, s) # t₀ = As₀
u .= t # u₀ = Aq₀
@kcopy!(n, u, t) # u₀ = Aq₀
ρ = @kdotr(n, t, s) # ρ₀ = ⟨t₀ , s₀⟩

# Compute ‖r₀‖
Expand Down
6 changes: 3 additions & 3 deletions src/cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v
mul!(r, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r)
else
r .= b
@kcopy!(n, r, b) # r ← b
end
MisI || mulorldiv!(z, M, r, ldiv)
p .= z
@kcopy!(n, p, z) # p ← z
γ = @kdotr(n, r, z)
rNorm = sqrt(γ)
history && push!(rNorms, rNorm)
Expand Down Expand Up @@ -182,7 +182,7 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v
inconsistent = !linesearch
end
if linesearch
iter == 0 && (x .= b)
iter == 0 && @kcopy!(n, x, b) # x ← b
solved = true
end
end
Expand Down
6 changes: 3 additions & 3 deletions src/cg_lanczos.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax
mul!(Mv, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), Mv)
else
Mv .= b
@kcopy!(n, Mv, b) # Mv ← b
end
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀
β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
Expand All @@ -152,13 +152,13 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax
solver.warm_start = false
return solver
end
p .= v
@kcopy!(n, p, v) # p ← v

# Initialize Lanczos process.
# β₁Mv₁ = b
@kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
Mv_prev .= Mv
@kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv

iter = 0
itmax == 0 && (itmax = 2 * n)
Expand Down
8 changes: 4 additions & 4 deletions src/cg_lanczos_shift.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t
for i = 1 : nshifts
@kfill!(x[i], zero(FC)) # x₀
end
Mv .= b # Mv₁ ← b
@kcopy!(n, Mv, b) # Mv₁ ← b
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁
β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
rNorms .= β
@kfill!(rNorms, β)
if history
for i = 1 : nshifts
push!(rNorms_history[i], rNorms[i])
Expand All @@ -155,14 +155,14 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t

# Initialize each p to v.
for i = 1 : nshifts
p[i] .= v
@kcopy!(n, p[i], v) # pᵢ ← v
end

# Initialize Lanczos process.
# β₁Mv₁ = b
@kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁
MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁
Mv_prev .= Mv
@kcopy!(n, Mv_prev, Mv) # Mv_prev ← Mv

# Initialize some constants used in recursions below.
ρ = one(T)
Expand Down
4 changes: 2 additions & 2 deletions src/cgls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose
Mq = MisI ? q : solver.Mr

@kfill!(x, zero(FC))
r .= b
@kcopy!(m, r, b) # r ← b
bNorm = @knrm2(m, r) # Marginally faster than norm(b)
if bNorm == 0
stats.niter = 0
Expand All @@ -160,7 +160,7 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose
end
MisI || mulorldiv!(Mr, M, r, ldiv)
mul!(s, Aᴴ, Mr)
p .= s
@kcopy!(n, p, s) # p ← s
γ = @kdotr(n, s, s) # γ = sᴴs
iter = 0
itmax == 0 && (itmax = m + n)
Expand Down
18 changes: 9 additions & 9 deletions src/cgls_lanczos_shift.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,11 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose
@kfill!(x[i], zero(FC)) # x₀
end

u .= b
@kcopy!(m, u, b) # u ← b
@kfill!(u_prev, zero(FC))
mul!(v, Aᴴ, u) # v₁ ← Aᴴ * b
β = sqrt(@kdotr(n, v, v)) # β₁ = v₁ᵀ M v₁
rNorms .= β
mul!(v, Aᴴ, u) # v₁ ← Aᴴ * b
β = sqrt(@kdotr(n, v, v)) # β₁ = v₁ᵀ M v₁
@kfill!(rNorms, β)
if history
for i = 1 : nshifts
push!(rNorms_history[i], rNorms[i])
Expand All @@ -158,17 +158,17 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose

# Initialize each p to v.
for i = 1 : nshifts
p[i] .= v
@kcopy!(n, p[i], v) # pᵢ ← v
end

# Initialize Lanczos process.
# β₁v₁ = b
@kscal!(n, one(FC) / β, v) # v₁ v₁ / β₁
@kscal!(n, one(FC) / β, v) # v₁ v₁ / β₁
@kscal!(m, one(FC) / β, u)

# Initialize some constants used in recursions below.
ρ = one(T)
σ .= β
@kfill!(σ, β)
@kfill!(δhat, zero(T))
@kfill!(ω, zero(T))
@kfill!(γ, one(T))
Expand Down Expand Up @@ -206,8 +206,8 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose
β = sqrt(@kdotr(n, v, v)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
@kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
@kscal!(m, one(FC) / β, utilde) # uₖ₊₁ = uₖ₊₁ / βₖ₊₁
u_prev .= u
u .= utilde
@kcopy!(m, u_prev, u) # u_prev ← u
@kcopy!(m, u, utilde) # u ← utilde

MisI || (ρ = @kdotr(n, v, v))
for i = 1 : nshifts
Expand Down
4 changes: 2 additions & 2 deletions src/cgne.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor
z = NisI ? r : solver.z

@kfill!(x, zero(FC))
r .= b
@kcopy!(m, r, b) # r ← b
NisI || mulorldiv!(z, N, r, ldiv)
rNorm = @knrm2(m, r) # Marginally faster than norm(r)
history && push!(rNorms, rNorm)
Expand All @@ -163,7 +163,7 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor
stats.status = "x = 0 is a zero-residual solution"
return solver
end
λ > 0 && (s .= r)
λ > 0 && @kcopy!(m, s, r) # s ← r
mul!(p, Aᴴ, z)

# Use ‖p‖ to detect inconsistent system.
Expand Down
8 changes: 4 additions & 4 deletions src/cgs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist
mul!(r₀, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r₀)
else
r₀ .= b
@kcopy!(n, r₀, b) # r₀ ← b
end

@kfill!(x, zero(FC)) # x₀
Expand Down Expand Up @@ -189,9 +189,9 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist
(verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))

u .= r # u₀
p .= r # p₀
@kfill!(q, zero(FC)) # q₋₁
@kcopy!(n, u, r) # u₀
@kcopy!(n, p, r) # p₀
@kfill!(q, zero(FC)) # q₋₁

# Stopping criterion.
solved = rNorm ≤ ε
Expand Down
8 changes: 4 additions & 4 deletions src/cr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema
mul!(p, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), p)
else
p .= b
@kcopy!(n, p, b) # p ← b
end
MisI && (r .= p)
MisI && @kcopy!(n, r, p) # r ← p
MisI || mulorldiv!(r, M, p, ldiv)
mul!(Ar, A, r)
ρ = @kdotr(n, r, Ar)
Expand All @@ -165,8 +165,8 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema
solver.warm_start = false
return solver
end
p .= r
q .= Ar
@kcopy!(n, p, r) # p ← r
@kcopy!(n, q, Ar) # q ← Ar
(verbose > 0) && (m = zero(T)) # quadratic model

iter = 0
Expand Down
2 changes: 1 addition & 1 deletion src/craig.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
@kfill!(x, zero(FC))
@kfill!(y, zero(FC))

Mu .= b
@kcopy!(m, Mu, b) # Mu ← b
MisI || mulorldiv!(u, M, Mu, ldiv)
β₁ = sqrt(@kdotr(m, u, Mu))
rNorm = β₁
Expand Down
14 changes: 7 additions & 7 deletions src/craigmr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver
# Compute y such that AAᴴy = b. Then recover x = Aᴴy.
@kfill!(x, zero(FC))
@kfill!(y, zero(FC))
Mu .= b
@kcopy!(m, Mu, b) # Mu ← b
MisI || mulorldiv!(u, M, Mu, ldiv)
β = sqrt(@kdotr(m, u, Mu))
if β == 0
Expand All @@ -208,7 +208,7 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver
MisI || @kscal!(m, one(FC)/β, Mu)
# α₁Nv₁ = Aᴴu₁.
mul!(Aᴴu, Aᴴ, u)
Nv .= Aᴴu
@kcopy!(n, Nv, Aᴴu) # Nv ← Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
Anorm² = α * α
Expand All @@ -233,10 +233,10 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver
NisI || @kscal!(n, one(FC)/α, Nv)

# Regularization.
λₖ = λ # λ₁ = λ
cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ
cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁
λ > 0 && (q .= v) # Additional vector needed to update x, by definition q₀ = 0
λₖ = λ # λ₁ = λ
cpₖ = spₖ = one(T) # Givens sines and cosines used to zero out λₖ
cdₖ = sdₖ = one(T) # Givens sines and cosines used to define λₖ₊₁
λ > 0 && @kcopy!(n, q, v) # Additional vector needed to update x, by definition q₀ = 0

if λ > 0
(cpₖ, spₖ, αhat) = sym_givens(α, λₖ)
Expand All @@ -257,7 +257,7 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver
ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.

wbar .= u
@kcopy!(m, wbar, u)
@kscal!(m, one(FC)/αhat, wbar)
@kfill!(w, zero(FC))
@kfill!(d, zero(FC))
Expand Down
6 changes: 3 additions & 3 deletions src/crls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose
MAp = MisI ? Ap : solver.Ms

@kfill!(x, zero(FC))
r .= b
@kcopy!(m, r, b) # r ← b
bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
history && push!(rNorms, rNorm)
Expand All @@ -158,8 +158,8 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose
mul!(s, A, Ar)
MisI || mulorldiv!(Ms, M, s, ldiv)

p .= Ar
Ap .= s
@kcopy!(n, p, Ar) # p ← Ar
@kcopy!(m, Ap, s) # Ap ← s
mul!(q, Aᴴ, Ms) # Ap
λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms)
Expand Down
4 changes: 2 additions & 2 deletions src/crmr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor
history && push!(ArNorms, zero(T))
return solver
end
λ > 0 && (s .= r)
λ > 0 && @kcopy!(m, s, r) # s ← r
mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0.
p .= Aᴴr
@kcopy!(n, p, Aᴴr) # p ← Aᴴr
γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr)
λ > 0 && (γ += λ * rNorm * rNorm)
iter = 0
Expand Down
2 changes: 1 addition & 1 deletion src/diom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem
mul!(t, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), t)
else
t .= b
@kcopy!(n, t, b) # t ← b
end
MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
Expand Down
2 changes: 1 addition & 1 deletion src/dqgmres.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ kwargs_dqgmres = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :ti
mul!(t, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), t)
else
t .= b
@kcopy!(n, t, b) # t ← b
end
MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
Expand Down
2 changes: 1 addition & 1 deletion src/fgmres.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ kwargs_fgmres = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :i
@kaxpby!(n, one(FC), b, -one(FC), w)
restart && @kaxpy!(n, one(FC), Δx, x)
else
w .= b
@kcopy!(n, w, b) # w ← b
end
MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
Expand Down
4 changes: 2 additions & 2 deletions src/fom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma
@kaxpby!(n, one(FC), b, -one(FC), w)
restart && @kaxpy!(n, one(FC), Δx, x)
else
w .= b
@kcopy!(n, w, b) # w ← b
end
MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
Expand Down Expand Up @@ -314,7 +314,7 @@ kwargs_fom = (:M, :N, :ldiv, :restart, :reorthogonalization, :atol, :rtol, :itma
@kaxpy!(n, y[i], V[i], xr)
end
if !NisI
solver.p .= xr
@kcopy!(n, solver.p, xr) # p ← xr
mulorldiv!(xr, N, solver.p, ldiv)
end
restart && @kaxpy!(n, one(FC), xr, x)
Expand Down
Loading
Loading