Skip to content

Commit

Permalink
Merge pull request #75 from QuantumBFS/upgrade-yao
Browse files Browse the repository at this point in the history
Upgrade yao
  • Loading branch information
GiggleLiu authored Jun 1, 2022
2 parents 762e45d + 53b34dd commit 74f74ab
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 121 deletions.
1 change: 0 additions & 1 deletion .codecov.yml

This file was deleted.

32 changes: 0 additions & 32 deletions .gitlab-ci.yml

This file was deleted.

8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "CuYao"
uuid = "b48ca7a8-dd42-11e8-2b8e-1b7706800275"
version = "0.3.3"
version = "0.3.4"

[deps]
BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf"
Expand All @@ -15,14 +15,14 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
Yao = "5872b779-8223-5990-8dd0-5abbb0748c8c"

[compat]
BitBasis = "0.7"
CUDA = "3.8"
BitBasis = "0.8"
CUDA = "3.10"
LuxurySparse = "0.6"
Reexport = "0.2, 1"
StaticArrays = "0.12, 1"
StatsBase = "0.33"
TupleTools = "1"
Yao = "0.7"
Yao = "0.8"
julia = "1"

[extras]
Expand Down
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

CUDA support for [Yao.jl](https://github.com/QuantumBFS/Yao.jl).

**We are in an early-release beta. Expect some adventures and rough edges.**
**Only tested locally, expect some adventures and rough edges.**

## Installation

Expand Down Expand Up @@ -44,12 +44,15 @@ To start, see the following example
```julia
using CuYao

cureg = rand_state(9; nbatch=1000) |> cu
cureg = rand_state(9; nbatch=1000) |> cu # or `curand_state(9; nbatch=1000)`.
cureg |> put(9, 2=>Z)
measure!(cureg |> addbits!(1) |> focus!(4,1,3))
measure!(cureg |> append_qubits!(1) |> focus!(4,1,3))
cureg |> relax!(4,1,3) |> cpu
```

Constructors `curand_state`, `cuzero_state`, `cuproduct_state`, `cuuniform_state` and `cughz_state` are tailored for GPU,
they are faster than uploading a CPU register to CPU.

## Features
### Supported Gates

Expand All @@ -63,15 +66,16 @@ cureg |> relax!(4,1,3) |> cpu

### Supported Register Operations
- measure!, measure_reset!, measure_remove!, select
- addbit!
- insert_qubit!
- append_qudits!, append_qubits!
- insert_qudit!, insert_qubits!
- focus!, relax!
- join
- density_matrix
- fidelity (not including density matrix)
- expect

### Other Operations
- statistic functional diff blocks
- expect for statistic functional
- autodiff is supported when the only parameterized gates are rotation gates in a circuit.

## The Team

Expand Down
11 changes: 11 additions & 0 deletions benchmarks/paralleldot.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
function paralleldot(matrices::CuVector, ptrA, ptrB)
@inline function kernel(ctx, matrices)
inds = @cartesianidx state
i = inds[1]
piecewise(state, inds)[i] *= anyone(i-1, mask) ? d : a
return
end
gpu_call(kernel, state, a, d, mask; elements=length(state))
return state
end

4 changes: 0 additions & 4 deletions bors.toml

This file was deleted.

8 changes: 1 addition & 7 deletions src/CUDApatch.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
using CUDA.GPUArrays: gpu_call, @linearidx, @cartesianidx, linear_index

# TODO
# support norm(view(reshape(A, m, n), :, 1))
using LinearAlgebra
import LinearAlgebra: norm
norm2(A::DenseCuArray; dims=1) = mapreduce(abs2, +, A, dims=dims) .|> CUDA.sqrt

export piecewise
piecewise(state::AbstractVector, inds) = state
piecewise(state::AbstractMatrix, inds) = @inbounds view(state,:,inds[2])

import Base: kron, getindex
function kron(A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2}
res = CUDA.zeros(promote_type(T1,T2), (size(A).*size(B))...)
@inline function kernel(ctx, res, A, B)
Expand All @@ -32,7 +26,7 @@ end
Computes Kronecker products in-place on the GPU.
The results are stored in 'C', overwriting the existing values of 'C'.
"""
function Yao.YaoBase.kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3}
function Yao.YaoArrayRegister.kron!(C::CuArray{T3}, A::DenseCuArray{T1}, B::DenseCuArray{T2}) where {T1, T2, T3}
@boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2))) || throw(DimensionMismatch())
CI = Base.CartesianIndices(C)
@inline function kernel(ctx, C, A, B)
Expand Down
22 changes: 20 additions & 2 deletions src/CuYao.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,31 @@ using Random

using Yao.YaoArrayRegister
using CUDA
using CUDA.GPUArrays: gpu_call, @linearidx, @cartesianidx, linear_index
@reexport using Yao
using Yao.YaoArrayRegister
using Yao.YaoBlocks
using Yao.ConstGate: SWAPGate
using Yao.ConstGate: S, T, Sdag, Tdag

import Yao.YaoArrayRegister: insert_qudits!, join
import CUDA: cu
import Yao.YaoArrayRegister: _measure, measure, measure!
import Yao.YaoArrayRegister: batch_normalize!
import Yao.YaoBlocks: BlockedBasis, nblocks, subblock
import Yao: expect
import Yao.YaoArrayRegister: u1rows!, unrows!, autostatic, instruct!, swaprows!
import LinearAlgebra: norm
import Base: kron, getindex

export cpu, cu, AbstractCuArrayReg, CuArrayReg, CuBatchedArrayReg, CuDensityMatrix,
cuzero_state, cuuniform_state, curand_state, cuproduct_state, cughz_state

const Ints = NTuple{<:Any, Int}

include("CUDApatch.jl")
include("GPUReg.jl")
include("gpuapplys.jl")
include("register.jl")
include("instructs.jl")

function __init__()
CUDA.allowscalar(false)
Expand Down
44 changes: 19 additions & 25 deletions src/gpuapplys.jl → src/instructs.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
using Yao.YaoBase
import Yao.YaoArrayRegister: u1rows!, unrows!, autostatic, instruct!, swaprows!

# get index
macro idx(shape, grididx=1, ctxsym=:ctx)
quote
Expand Down Expand Up @@ -45,15 +42,15 @@ instruct!(::Val{2}, state::DenseCuVecOrMat, U0::IMatrix, locs::NTuple{M, Int}, c
instruct!(::Val{2}, state::DenseCuVecOrMat, U0::SDSparseMatrixCSC, locs::NTuple{M, Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {C, M} = instruct!(Val(2), state, U0 |> Matrix, locs, clocs, cvals)

################## General U1 apply! ###################
function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::SDSparseMatrixCSC, locs::Tuple{Int})
instruct!(Val(2), state, Matrix(U1), locs, clocs, cval)
function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::SDSparseMatrixCSC, loc::Int)
instruct!(Val(2), state, Matrix(U1), loc, clocs, cval)
end
function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::AbstractMatrix, locs::Tuple{Int})
@debug "The generic U(2) matrix of size ($(size(U1))), on: GPU, locations: $(locs)."
function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::AbstractMatrix, loc::Int)
@debug "The generic U(2) matrix of size ($(size(U1))), on: GPU, locations: $(loc)."
a, c, b, d = U1
nbit = log2dim1(state)
step = 1<<(locs[1]-1)
configs = itercontrol(nbit, [locs[1]], [0])
step = 1<<(loc-1)
configs = itercontrol(nbit, [loc], [0])

len = length(configs)
@inline function kernel(ctx, state, a, b, c, d, len)
Expand All @@ -66,12 +63,12 @@ function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::AbstractMatrix, locs::T
return state
end

function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::SDPermMatrix, locs::Tuple{Int})
@debug "The single qubit permutation matrix of size ($(size(U1))), on: GPU, locations: $(locs)."
function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::SDPermMatrix, loc::Int)
@debug "The single qubit permutation matrix of size ($(size(U1))), on: GPU, locations: $(loc)."
nbit = log2dim1(state)
b, c = U1.vals
step = 1<<(locs[1]-1)
configs = itercontrol(nbit, [locs[1]], [0])
step = 1<<(loc-1)
configs = itercontrol(nbit, [loc], [0])

len = length(configs)
function kernel(ctx, state, b, c, step, len, configs)
Expand All @@ -84,11 +81,11 @@ function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::SDPermMatrix, locs::Tup
return state
end

function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::SDDiagonal, locs::Tuple{Int})
@debug "The single qubit diagonal matrix of size ($(size(U1))), on: GPU, locations: $(locs)."
function YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U1::SDDiagonal, loc::Int)
@debug "The single qubit diagonal matrix of size ($(size(U1))), on: GPU, locations: $(loc)."
a, d = U1.diag
nbit = log2dim1(state)
mask = bmask(locs...)
mask = bmask(loc)
@inline function kernel(ctx, state, a, d, mask)
inds = @cartesianidx state
i = inds[1]
Expand All @@ -99,10 +96,9 @@ function instruct!(::Val{2}, state::DenseCuVecOrMat, U1::SDDiagonal, locs::Tuple
return state
end

instruct!(::Val{2}, state::DenseCuVecOrMat, U::IMatrix, locs::Tuple{Int}) = state
YaoArrayRegister.single_qubit_instruct!(state::DenseCuVecOrMat, U::IMatrix, loc::Int) = state

################## XYZ #############
using Yao.ConstGate: S, T, Sdag, Tdag

_instruct!(state::DenseCuVecOrMat, ::Val{:X}, locs::NTuple{L,Int}) where {L} = _instruct!(state, Val(:X), locs, (), ())
function _instruct!(state::DenseCuVecOrMat, ::Val{:X}, locs::NTuple{L,Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where {L,C}
Expand Down Expand Up @@ -210,19 +206,19 @@ end

for G in [:X, :Y, :Z, :S, :T, :Sdag, :Tdag]
@eval begin
function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::NTuple{C,Int}) where C
function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::NTuple{C,Int}) where C
_instruct!(state, g, locs)
end

function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int})
function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int})
_instruct!(state, g, locs)
end

function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C
function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, g::Val{$(QuoteNode(G))}, locs::Tuple{Int}, clocs::NTuple{C, Int}, cvals::NTuple{C, Int}) where C
_instruct!(state, g, locs, clocs, cvals)
end

function YaoBase.instruct!(::Val{2}, state::DenseCuVecOrMat, vg::Val{$(QuoteNode(G))}, locs::Tuple{Int}, cloc::Tuple{Int}, cval::Tuple{Int})
function YaoArrayRegister.instruct!(::Val{2}, state::DenseCuVecOrMat, vg::Val{$(QuoteNode(G))}, locs::Tuple{Int}, cloc::Tuple{Int}, cval::Tuple{Int})
_instruct!(state, vg, locs, cloc, cval)
end
end
Expand Down Expand Up @@ -251,7 +247,6 @@ end

############## other gates ################
# parametrized swap gate
using Yao.ConstGate: SWAPGate

function instruct!(::Val{2}, state::DenseCuVecOrMat, ::Val{:PSWAP}, locs::Tuple{Int, Int}, θ::Real)
@debug "The PSWAP gate, on: GPU, locations: $(locs)."
Expand All @@ -276,8 +271,7 @@ function instruct!(::Val{2}, state::DenseCuVecOrMat, ::Val{:PSWAP}, locs::Tuple{
state
end

using Yao.YaoBlocks
function YaoBlocks._apply_fallback!(r::GPUReg{B,T}, b::AbstractBlock) where {B,T}
function YaoBlocks._apply_fallback!(r::AbstractCuArrayReg{B,T}, b::AbstractBlock) where {B,T}
YaoBlocks._check_size(r, b)
r.state .= CUDA.adapt(CuArray{T}, mat(T, b)) * r.state
return r
Expand Down
Loading

0 comments on commit 74f74ab

Please sign in to comment.