diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 1269e4c..24be933 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -6,7 +6,7 @@ model_32 = include("$(root_dir)/misc/rbc_float32.jl") model32 = Dolo.convert_precision(Float32, model_32) -dm32 = Dolo.discretize(model32, Dict(:endo=>[1000]) ) +dm32 = Dolo.discretize(model32, Dict(:endo=>[100000]) ) typeof(dm32) @@ -23,6 +23,7 @@ wk0 = Dolo.time_iteration_workspace(dm32) wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) # wk = adapt(oneArray, wk0) + wk = adapt(CuArray, wk0) using KernelAbstractions: get_backend @@ -39,50 +40,6 @@ t_e = get_backend(wk.x0) using KernelAbstractions using StaticArrays -@kernel function ggg(r, dm, x, φ) -# @kernel function fun(dm, r, x) - - - n = @index(Global, Linear) - ind = @index(Global, Cartesian) - (i,j) = ind.I - - - # k = length(dm.grid.grids[1]) - # i_ = (n-1)÷k - # j_ = (n-1) - (i)*κ - # i = i_+1 - # j = j_+1 - - # TODO: special function here - s_ = dm.grid[n] - s = Dolo.QP((i,j), s_) - - xx = x[n] - - # (i,j) = @inline Dolo.from_linear(model.grid, n) - - rr = Dolo.F(dm, s, xx, φ) - - r[n] = rr - -end - -using BenchmarkTools - -fun_ = ggg(t_e) - -@time fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) - - -@benchmark fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) - - -@benchmark Dolo.F!(wk0.r0, dm32, wk0.x0, wk0.φ) - - -Dolo.distance(wk0.r0, adapt(Array,wk.r0)) - # try @@ -91,16 +48,13 @@ Dolo.distance(wk0.r0, adapt(Array,wk.r0)) # nothing # end +wk0 = Dolo.time_iteration_workspace(dm32) +wk = adapt(CuArray, wk0) +@time Dolo.time_iteration(dm32, wk, verbose=false; engine=:gpu); - -Dolo.time_iteration(dm32, wk; engine=:gpu) - - -s_0 = dm32.grid[10] -s0 = Dolo.QP((2,2), s_0) -xx0 = wk0.x0[10] +@time Dolo.time_iteration(dm32, wk0, verbose=false; engine=:gpu); -fon(dm32, s0, xx0) = sum(w*S.val for (w,S) in Dolo.τ(dm32, s0, xx0)) +Dolo.distance(adapt(Array,wk.x0) , wk0.x0) -@code_warntype fon(dm32, s0, xx0) \ No newline at end of file +# 357 times faster ! \ No newline at end of file diff --git a/src/adapt.jl b/src/adapt.jl index 10362ef..42ca3d1 100644 --- a/src/adapt.jl +++ b/src/adapt.jl @@ -34,3 +34,6 @@ end import KernelAbstractions: get_backend get_backend(g::GArray) = get_backend(g.data) + +import CUDA: CuArray +distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) \ No newline at end of file diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl index a4ca41f..e1d9f12 100644 --- a/src/algos/time_iteration_accelerated.jl +++ b/src/algos/time_iteration_accelerated.jl @@ -39,23 +39,30 @@ using KernelAbstractions # This is for a PGrid model only function F!(r, model, x, φ, engine) - @kernel function FF_(r, @Const(model), @Const(x), @Const(φ)) - + @kernel function FF_(r, @Const(dm), @Const(x), @Const(φ)) n = @index(Global, Linear) - (i,j) = Dolo.from_linear(model.grid, n) - - s_ = model.grid[n] - s = QP((i,j), s_) - xx = x[n] - - rr = sum( - w*Dolo.arbitrage(model,s,xx,S,φ(S)) - for (w,S) in Dolo.τ(model, s, xx) - ) + ind = @index(Global, Cartesian) + (i,j) = ind.I + + # k = length(dm.grid.grids[1]) + # i_ = (n-1)÷k + # j_ = (n-1) - (i)*κ + # i = i_+1 + # j = j_+1 - r[i,j] = rr - + # TODO: special function here + s_ = dm.grid[n] + s = Dolo.QP((i,j), s_) + + xx = x[n] + + # (i,j) = @inline Dolo.from_linear(model.grid, n) + + rr = Dolo.F(dm, s, xx, φ) + + r[n] = rr + end fun_cpu = FF_(engine) @@ -79,28 +86,36 @@ end -function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, ::CPU) +function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, engine) - @kernel function FF_(r,@Const(model), @Const(x),@Const(φ) ) + @kernel function FF_(r,@Const(dm), @Const(x),@Const(φ) ) - # c = @index(Global, Cartesian) - # i,j = c.I n = @index(Global, Linear) - # i,j = c.I - (i,j) = Dolo.from_linear(model.grid, n) - - s_ = model.grid[n] - s = QP((i,j), s_) - xx = x[i,j] + ind = @index(Global, Cartesian) + (i,j) = ind.I + + # k = length(dm.grid.grids[1]) + # i_ = (n-1)÷k + # j_ = (n-1) - (i)*κ + # i = i_+1 + # j = j_+1 + # TODO: special function here + s_ = dm.grid[n] + s = Dolo.QP((i,j), s_) + + xx = x[n] + + # (i,j) = @inline Dolo.from_linear(model.grid, n) + rr = ForwardDiff.jacobian(u->F(model, s, u, φ), xx) - - r[i,j] = rr + + r[n] = rr end - fun_cpu = FF_(CPU()) + fun_cpu = FF_(engine) if typeof(model.grid)<:CGrid p = model.grid.ranges[1][3]