diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl
index 1269e4c..24be933 100644
--- a/misc/dev_float32.jl
+++ b/misc/dev_float32.jl
@@ -6,7 +6,7 @@ model_32 = include("$(root_dir)/misc/rbc_float32.jl")
 
 model32 = Dolo.convert_precision(Float32, model_32)
 
-dm32 = Dolo.discretize(model32, Dict(:endo=>[1000]) )
+dm32 = Dolo.discretize(model32, Dict(:endo=>[100000]) )
 
 
 typeof(dm32)
@@ -23,6 +23,7 @@ wk0 = Dolo.time_iteration_workspace(dm32)
 wk = Dolo.time_iteration_workspace(dm32, dest=CuArray)
 
 # wk = adapt(oneArray, wk0)
+
 wk = adapt(CuArray, wk0)
 using KernelAbstractions: get_backend
 
@@ -39,50 +40,6 @@ t_e = get_backend(wk.x0)
 using KernelAbstractions
 using StaticArrays
 
-@kernel function ggg(r, dm, x, φ)
-# @kernel function fun(dm, r, x)
-
-
-    n = @index(Global, Linear)
-    ind = @index(Global, Cartesian)
-    (i,j) = ind.I
-
-
-    # k = length(dm.grid.grids[1])
-    # i_ = (n-1)÷k
-    # j_ = (n-1) - (i)*κ
-    # i = i_+1
-    # j = j_+1
-    
-    # TODO: special function here
-    s_ = dm.grid[n]
-    s = Dolo.QP((i,j), s_)
-
-    xx = x[n]
-
-    # (i,j) = @inline Dolo.from_linear(model.grid, n)    
-
-    rr = Dolo.F(dm, s, xx, φ)
-
-    r[n] = rr
-
-end
-
-using BenchmarkTools
-
-fun_ = ggg(t_e)
-
-@time fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q))
-
-
-@benchmark fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q))
-
-
-@benchmark Dolo.F!(wk0.r0, dm32, wk0.x0, wk0.φ)
-
-
-Dolo.distance(wk0.r0, adapt(Array,wk.r0))
-
 
 # try
 
@@ -91,16 +48,13 @@ Dolo.distance(wk0.r0, adapt(Array,wk.r0))
     # nothing
 # end
 
+wk0 = Dolo.time_iteration_workspace(dm32)
+wk = adapt(CuArray, wk0)
+@time Dolo.time_iteration(dm32, wk, verbose=false; engine=:gpu);
 
-
-Dolo.time_iteration(dm32, wk; engine=:gpu)
-
-
-s_0 = dm32.grid[10]
-s0 = Dolo.QP((2,2), s_0)
-xx0 = wk0.x0[10]
+@time Dolo.time_iteration(dm32, wk0, verbose=false; engine=:gpu);
 
 
-fon(dm32, s0, xx0) = sum(w*S.val for (w,S) in Dolo.τ(dm32, s0, xx0))
+Dolo.distance(adapt(Array,wk.x0) , wk0.x0)
 
-@code_warntype fon(dm32, s0, xx0)
\ No newline at end of file
+# 357 times faster !
\ No newline at end of file
diff --git a/src/adapt.jl b/src/adapt.jl
index 10362ef..42ca3d1 100644
--- a/src/adapt.jl
+++ b/src/adapt.jl
@@ -34,3 +34,6 @@ end
 import KernelAbstractions: get_backend
 
 get_backend(g::GArray) = get_backend(g.data)
+
+import CUDA: CuArray
+distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(u), max, x.data-y.data)
\ No newline at end of file
diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl
index a4ca41f..e1d9f12 100644
--- a/src/algos/time_iteration_accelerated.jl
+++ b/src/algos/time_iteration_accelerated.jl
@@ -39,23 +39,30 @@ using KernelAbstractions
 # This is for a PGrid model only 
 function F!(r, model, x, φ, engine)
 
-    @kernel function FF_(r, @Const(model), @Const(x), @Const(φ))
-
+    @kernel function FF_(r, @Const(dm), @Const(x), @Const(φ))
 
         n = @index(Global, Linear)
-        (i,j) = Dolo.from_linear(model.grid, n)
-
-        s_ = model.grid[n]
-        s = QP((i,j), s_)
-        xx = x[n]
-        
-        rr = sum(
-            w*Dolo.arbitrage(model,s,xx,S,φ(S)) 
-            for (w,S) in Dolo.τ(model, s, xx)
-        )      
+        ind = @index(Global, Cartesian)
+        (i,j) = ind.I
+    
+        # k = length(dm.grid.grids[1])
+        # i_ = (n-1)÷k
+        # j_ = (n-1) - (i)*κ
+        # i = i_+1
+        # j = j_+1
         
-        r[i,j] = rr
-
+        # TODO: special function here
+        s_ = dm.grid[n]
+        s = Dolo.QP((i,j), s_)
+    
+        xx = x[n]
+    
+        # (i,j) = @inline Dolo.from_linear(model.grid, n)    
+    
+        rr = Dolo.F(dm, s, xx, φ)
+    
+        r[n] = rr
+    
     end
 
     fun_cpu = FF_(engine)
@@ -79,28 +86,36 @@ end
 
 
 
-function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, ::CPU)
+function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, engine)
 
-    @kernel function FF_(r,@Const(model), @Const(x),@Const(φ) )
+    @kernel function FF_(r,@Const(dm), @Const(x),@Const(φ) )
 
-        # c = @index(Global, Cartesian)
-        # i,j = c.I
 
         n = @index(Global, Linear)
-        # i,j = c.I
-        (i,j) = Dolo.from_linear(model.grid, n)
-
-        s_ = model.grid[n]
-        s = QP((i,j), s_)
-        xx = x[i,j]
+        ind = @index(Global, Cartesian)
+        (i,j) = ind.I
+    
+        # k = length(dm.grid.grids[1])
+        # i_ = (n-1)÷k
+        # j_ = (n-1) - (i)*κ
+        # i = i_+1
+        # j = j_+1
         
+        # TODO: special function here
+        s_ = dm.grid[n]
+        s = Dolo.QP((i,j), s_)
+    
+        xx = x[n]
+    
+        # (i,j) = @inline Dolo.from_linear(model.grid, n)    
+    
         rr = ForwardDiff.jacobian(u->F(model, s, u, φ), xx)
-        
-        r[i,j] = rr
+    
+        r[n] = rr
 
     end
 
-    fun_cpu = FF_(CPU())
+    fun_cpu = FF_(engine)
 
     if typeof(model.grid)<:CGrid
         p = model.grid.ranges[1][3]