From 3f9f2362a196cc995148c5160f06a42f4435efcd Mon Sep 17 00:00:00 2001 From: Mason Protter Date: Sat, 11 Nov 2023 23:03:10 +0100 Subject: [PATCH] Major cleanup. Remove `compile`, `compile_wasm`, and `MixTape` (#146) * Remove `compile`, do some cleanup * update README * note about `compile_shlib` * note about compile_shlib * remove compile_wasm and MixTape; allow specifying a method_table * Forgot v1.8 doesn't have package extensions * oops * don't export `compile_wasm` --- .github/workflows/ci-integration.yml | 1 + .github/workflows/ci.yml | 1 + Project.toml | 4 +- README.md | 86 +++---- src/StaticCompiler.jl | 289 ++++------------------- src/code_loading.jl | 84 ------- src/interpreter.jl | 44 ++-- src/optimize.jl | 328 --------------------------- src/pointer_patching.jl | 185 --------------- src/pointer_warning.jl | 72 ++++++ src/target.jl | 49 ++-- test/Project.toml | 5 +- test/runtests.jl | 6 +- test/testcore.jl | 257 +++------------------ test/testintegration.jl | 168 +++++--------- 15 files changed, 294 insertions(+), 1285 deletions(-) delete mode 100644 src/code_loading.jl delete mode 100644 src/optimize.jl delete mode 100644 src/pointer_patching.jl create mode 100644 src/pointer_warning.jl diff --git a/.github/workflows/ci-integration.yml b/.github/workflows/ci-integration.yml index be2fadde..beb16c17 100644 --- a/.github/workflows/ci-integration.yml +++ b/.github/workflows/ci-integration.yml @@ -20,6 +20,7 @@ jobs: version: - '1.8' - '1.9' + - '1.10.0-rc1' os: - ubuntu-latest - macOS-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eb6a3fa6..3bbb9fd2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,7 @@ jobs: version: - '1.8' - '1.9' + - '1.10.0-rc1' os: - ubuntu-latest - macOS-latest diff --git a/Project.toml b/Project.toml index afb31dc7..4b32fd7b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "StaticCompiler" uuid = "81625895-6c0f-48fc-b932-11a18313743c" authors = ["Tom Short and contributors"] -version = "0.5.3" +version = "0.6" [deps] Clang_jll = "0ee61d77-7f21-5576-8119-9fcc46b10100" @@ -17,7 +17,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" [compat] CodeInfoTools = "0.3" -GPUCompiler = "0.21" +GPUCompiler = "0.21, 0.22, 0.23, 0.24" LLVM = "6" MacroTools = "0.5" StaticTools = "0.8" diff --git a/README.md b/README.md index 4b0779bf..d4816cb9 100644 --- a/README.md +++ b/README.md @@ -15,39 +15,8 @@ using Pkg Pkg.add("StaticCompiler") ``` -There are two main ways to use this package: - -### Linked compilation -The first option is via the `compile` function, which can be used when you want to compile a Julia function for later use from within Julia: -```julia -julia> using StaticCompiler - -julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -fib (generic function with 1 method) - -julia> fib_compiled, path = compile(fib, Tuple{Int}, "fib") -(f = fib(::Int64) :: Int64, path = "fib") - -julia> fib_compiled(10) -55 -``` -Now we can quit this session and load a new one where `fib` is not defined: -```julia -julia> using StaticCompiler - -julia> fib -ERROR: UndefVarError: fib not defined - -julia> fib_compiled = load_function("fib") -fib(::Int64) :: Int64 - -julia> fib_compiled(10) -55 -``` -See the file `tests/runtests.jl` for some examples of functions that work with `compile` (and some that don't, marked with `@test_skip`). - ### Standalone compilation -The second way to use this package is via the `compile_executable` and `compile_shlib` functions, for when you want to compile a Julia function to a native executable or shared library for use from outside of Julia: +StaticCompiler.jl provides the functions `compile_executable` and `compile_shlib` for compiling a Julia function to a native executable or shared library for use from outside of Julia: ```julia julia> using StaticCompiler, StaticTools @@ -63,17 +32,42 @@ shell> ls -alh hello shell> ./hello Hello, world! ``` -This latter approach comes with substantially more limitations, as you cannot rely on `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). +This approach comes with substantial limitations compared to regular julia code, as you cannot rely on julia's runtime, `libjulia` (see, e.g., [StaticTools.jl](https://github.com/brenhinkeller/StaticTools.jl) for some ways to work around these limitations). + +The low-level function `StaticCompiler.generate_obj` (not exported) generates object files. This can be used for more control of compilation. This can be used for example, to cross-compile to other targets. + +### Method overlays -The low-level function `StaticCompiler.generate_obj` (not exported) generates object files. This can be used for more control of compilation. This can be used to cross-compile to other targets. +Sometimes, a julia function you want to statically compile will do things (such as throwing errors) that aren't supported natively by StaticCompiler. One tool provided for working around this is the `@device_override` macro which lets you swap out a method, but only inside of a StaticCompiler.jl compilation context. For example: -### Mixtape +```julia +julia> using Libdl, StaticCompiler + +julia> f(x) = g(x) + 1; + +julia> g(x) = 2x + +julia> @device_override g(x::Int) = x - 10 -This feature allows one to change functionality when statically compiling. This uses code and API from [Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) to transform lowered code much like [Cassette](https://github.com/JuliaLabs/Cassette.jl). +julia> f(1) # Gives the expected answer in regular julia +3 + +julia> dlopen(compile_shlib(f, (Int,), "./")) do lib + fptr = dlsym(lib, "f") + # Now use the compiled version where + is replaced with - + @ccall $fptr(1::Int)::Int + end +-8 +``` +Typically, errors should be overrided and replaced with `@print_and_throw`, which is StaticCompiler friendly, i.e. +we define overrides such as +``` julia +@device_override @noinline Base.Math.throw_complex_domainerror(f::Symbol, x) = + @print_and_throw c"This operation requires a complex input to return a complex result" +``` -To use the Mixtape feature, define a `CompilationContext` struct and pass this to any of the compilation functions with the `mixtape` keyword. Define `transform` and `allow` functions for this `CompilationContext` to define the transformation to be done. +If for some reason, you wish to use a different method table (defined with `Base.Experimental.@MethodTable` and `Base.Experimental.@overlay`) than the default one provided by StaticCompiler.jl, you can provide it to `compile_executable` and `compile_shlib` via a keyword argument `method_table`. -See [here](https://github.com/tshort/StaticCompiler.jl/blob/master/test/testintegration.jl#L329) for an example. ## Approach @@ -81,7 +75,6 @@ This package uses the [GPUCompiler package](https://github.com/JuliaGPU/GPUCompi ## Limitations -* GC-tracked allocations and global variables do work with `compile`, but the way they are implemented is brittle and can be dangerous. Allocate with care. * GC-tracked allocations and global variables do *not* work with `compile_executable` or `compile_shlib`. This has some interesting consequences, including that all functions _within_ the function you want to compile must either be inlined or return only native types (otherwise Julia would have to allocate a place to put the results, which will fail). * Since error handling relies on libjulia, you can only throw errors from standalone-compiled (`compile_executable` / `compile_shlib`) code if an explicit overload has been defined for that particular error with `@device_override` (see [quirks.jl](src/quirks.jl)). * Type instability. Type unstable code cannot currently be statically compiled via this package. @@ -97,19 +90,19 @@ To enable code to be statically compiled, consider the following: * Avoid Julia's internal allocations. That means don't bake in use of Arrays or Strings or Dicts. Types from StaticTools can help, like StaticStrings and MallocArrays. -* If need be, manage memory manually, using `malloc` and `free`. This works with StaticTools.MallocString and StaticTools.MallocArray. +* If need be, manage memory manually, using `malloc` and `free` from StaticTools.jl. This works with `StaticTools.MallocString` and `StaticTools.MallocArray`, or use [Bumper.jl](https://github.com/MasonProtter/Bumper.jl). * Don't use global variables that need to be allocated and initialized. Instead of global variables, use context structures that have an initialization function. It is okay to use global Tuples or NamedTuples as the use of these should be baked into compiled code. * Use context variables to store program state, inputs, and outputs. Parameterize these typese as needed, so your code can handle normal types (Arrays) and static-friendly types (StaticArrays, MallocArrays, or StrideArrays). The SciML ecosystem does this well ([example](https://github.com/SciML/OrdinaryDiffEq.jl/blob/e7f045950615352ddfcb126d13d92afd2bad05e4/src/integrators/type.jl#L82)). Use of these context variables also enables allocations and initialization to be centralized, so these could be managed by the calling routines in Julia, Python, JavaScript, or other language. -* If your code needs an array as a workspace, instead of directly creating it, create it as a function argument (where it could default to a standard array creation). That code could be statically compiled if that function argument is changed to a MallocArray or another static-friendly alternative. +* Arguments and returned values from `compile_shlib` must be native objects such as `Int`, `Float64`, or `Ptr`. They cannot be things like `Tuple{Int, Int}` because that is not natively sized. Such objects need to be passed by reference instead of by value. -* Use [Bumper.jl](https://github.com/MasonProtter/Bumper.jl) to avoid allocations in some loops. +* If your code needs an array as a workspace, instead of directly creating it, create it as a function argument (where it could default to a standard array creation). That code could be statically compiled if that function argument is changed to a MallocArray or another static-friendly alternative. ## Guide for Statically Compiling Code -If you're trying to statically compile generic code, you may run into issues if that code uses features not supported by StaticCompiler. One option is to change the code you're calling using the tips above. If that is not easy, you may by able to compile it anyway. One option is to use method overrides to change what methods are called. Another option is to use the Mixtape feature to change problematic code as part of compilation. For example, you could convert all Strings to StaticStrings. +If you're trying to statically compile generic code, you may run into issues if that code uses features not supported by StaticCompiler. One option is to change the code you're calling using the tips above. If that is not easy, you may by able to compile it anyway. One option is to use method overlays to change what methods are called. [Cthulhu](https://github.com/JuliaDebug/Cthulhu.jl) is a great help in digging into code, finding type instabilities, and finding other sources of code that may break static compilation. @@ -117,9 +110,4 @@ If you're trying to statically compile generic code, you may run into issues if Because Julia objects follow C memory layouts, compiled libraries should be usable from most languages that can interface with C. For example, results should be usable with Python's [CFFI](https://cffi.readthedocs.io/en/latest/) package. -For WebAssembly, interface helpers are available at [WebAssemblyInterfaces](https://github.com/tshort/WebAssemblyInterfaces.jl). - - - - - +For WebAssembly, interface helpers are available at [WebAssemblyInterfaces](https://github.com/tshort/WebAssemblyInterfaces.jl), and users should also see [WebAssemblyCompiler](https://github.com/tshort/WebAssemblyCompiler.jl) for a package more focused on compilation of WebAssebly in general. diff --git a/src/StaticCompiler.jl b/src/StaticCompiler.jl index cc9ecf66..a44af51d 100644 --- a/src/StaticCompiler.jl +++ b/src/StaticCompiler.jl @@ -11,192 +11,20 @@ using Clang_jll: clang using LLD_jll: lld using StaticTools using StaticTools: @symbolcall, @c_str, println +using Core: MethodTable - -export compile, load_function, compile_shlib, compile_executable, compile_wasm +export load_function, compile_shlib, compile_executable export native_code_llvm, native_code_typed, native_llvm_module, native_code_native export @device_override, @print_and_throw -include("mixtape.jl") include("interpreter.jl") include("target.jl") -include("pointer_patching.jl") -include("code_loading.jl") -include("optimize.jl") +include("pointer_warning.jl") include("quirks.jl") fix_name(f::Function) = fix_name(string(nameof(f))) fix_name(s) = String(GPUCompiler.safe_name(s)) -""" - compile(f, types, path::String = tempname()) --> (compiled_f, path) - - !!! Warning: this will fail on programs that have dynamic dispatch !!! - -Statically compile the method of a function `f` specialized to arguments of the type given by `types`. - -This will create a directory at the specified path (or in a temporary directory if you exclude that argument) -that contains the files needed for your static compiled function. `compile` will return a -`StaticCompiledFunction` object and `obj_path` which is the absolute path of the directory containing the -compilation artifacts. The `StaticCompiledFunction` can be treated as if it is a function with a single -method corresponding to the types you specified when it was compiled. - -To deserialize and instantiate a previously compiled function, simply execute `load_function(path)`, which -returns a callable `StaticCompiledFunction`. - -### Example: - -Define and compile a `fib` function: -```julia -julia> using StaticCompiler - -julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -fib (generic function with 1 method) - -julia> fib_compiled, path = compile(fib, Tuple{Int}, "fib") -(f = fib(::Int64) :: Int64, path = "fib") - -julia> fib_compiled(10) -55 -``` -Now we can quit this session and load a new one where `fib` is not defined: -```julia -julia> fib -ERROR: UndefVarError: fib not defined - -julia> using StaticCompiler - -julia> fib_compiled = load_function("fib.cjl") -fib(::Int64) :: Int64 - -julia> fib_compiled(10) -55 -``` -Tada! - -### Details: - -Here is the structure of the directory created by `compile` in the above example: -```julia -shell> tree fib -path -├── obj.cjl -└── obj.o - -0 directories, 3 files -```` -* `obj.o` contains statically compiled code in the form of an LLVM generated object file. -* `obj.cjl` is a serialized `LazyStaticCompiledFunction` object which will be deserialized and instantiated -with `load_function(path)`. `LazyStaticcompiledfunction`s contain the requisite information needed to link to the -`obj.o` inside a julia session. Once it is instantiated in a julia session (i.e. by -`instantiate(::LazyStaticCompiledFunction)`, this happens automatically in `load_function`), it will be of type -`StaticCompiledFunction` and may be called with arguments of type `types` as if it were a function with a -single method (the method determined by `types`). -""" -function compile(f, _tt, path::String = tempname(); - mixtape = NoContext(), - name = fix_name(f), - filename = "obj", - strip_llvm = false, - strip_asm = true, - opt_level=3, - kwargs...) - - tt = Base.to_tuple_type(_tt) - isconcretetype(tt) || error("input type signature $_tt is not concrete") - - rt = last(only(native_code_typed(f, tt, mixtape = mixtape))) - isconcretetype(rt) || error("$f on $_tt did not infer to a concrete type. Got $rt") - f_wrap!(out::Ref, args::Ref{<:Tuple}) = (out[] = f(args[]...); nothing) - _, _, table = generate_obj_for_compile(f_wrap!, Tuple{RefValue{rt}, RefValue{tt}}, false, path, name; mixtape = mixtape, opt_level, strip_llvm, strip_asm, filename, kwargs...) - - lf = LazyStaticCompiledFunction{rt, tt}(Symbol(f), path, name, filename, table) - cjl_path = joinpath(path, "$filename.cjl") - serialize(cjl_path, lf) - - (; f = instantiate(lf), path=abspath(path)) -end - -""" -```julia -generate_obj_for_compile(f, tt, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; - \tmixtape = NoContext(), - \tstrip_llvm = false, - \tstrip_asm = true, - \ttarget = (), - \topt_level = 3, - \tkwargs...) -``` -Low level interface for compiling object code (`.o`) for for function `f` given -a tuple type `tt` characterizing the types of the arguments for which the -function will be compiled. - -`mixtape` defines a context that can be used to transform IR prior to compilation using -[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. - -`target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. -This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). -The defaults compile to the native target. - -### Examples -```julia -julia> fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) -fib (generic function with 1 method) - -julia> path, name, table = StaticCompiler.generate_obj_for_compile(fib, Tuple{Int64}, "./test") -("./test", "fib", IdDict{Any, String}()) - -shell> tree \$path -./test -└── obj.o - -0 directories, 1 file -``` -""" -function generate_obj_for_compile(f, tt, external = true, path::String = tempname(), name = fix_name(f), filenamebase::String="obj"; - mixtape = NoContext(), - strip_llvm = false, - strip_asm = true, - opt_level = 3, - remove_julia_addrspaces = false, - target = (), - kwargs...) - mkpath(path) - obj_path = joinpath(path, "$filenamebase.o") - #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - params = StaticCompilerParams(opt = true, mixtape = mixtape, optlevel = Base.JLOptions().opt_level) - config = GPUCompiler.CompilerConfig(NativeCompilerTarget(target...), params, name = name, kernel = false) - job = GPUCompiler.CompilerJob(GPUCompiler.methodinstance(typeof(f), tt), config) - - table = GPUCompiler.JuliaContext() do context - mod, meta = GPUCompiler.codegen(:llvm, job; strip=strip_llvm, only_entry=false, validate=false, optimize=false) - # Use Enzyme's annotation and optimization pipeline - annotate!(mod) - tm = GPUCompiler.llvm_machine(external ? ExternalNativeCompilerTarget(target...) : NativeCompilerTarget(target...)) - optimize!(mod, tm) - - # Scoop up all the pointers in the optimized module, and replace them with unitialized global variables. - # `table` is a dictionary where the keys are julia objects that are needed by the function, and the values - # of the dictionary are the names of their associated LLVM GlobalVariable names. - table = relocation_table!(mod) - - # Now that we've removed all the pointers from the code, we can (hopefully) safely lower all the instrinsics - # (again, using Enzyme's pipeline) - post_optimize!(mod, tm; remove_julia_addrspaces) - - # Make sure we didn't make any glaring errors - LLVM.verify(mod) - obj, _ = GPUCompiler.emit_asm(job, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) - # Compile the LLVM module to native code and save it to disk - open(obj_path, "w") do io - write(io, obj) - end - table - end - - - path, name, table -end """ ```julia @@ -204,6 +32,7 @@ compile_executable(f::Function, types::Tuple, path::String, [name::String=string filename::String=name, cflags=``, # Specify libraries you would like to link against, and other compiler options here also_expose=[], + method_table=StaticCompiler.method_table, kwargs... ) ``` @@ -295,11 +124,23 @@ end """ ```julia -compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; filename::String=name, cflags=``, kwargs...) -compile_shlib(funcs::Array, [path::String="./"]; filename="libfoo", demangle=true, cflags=``, kwargs...) +compile_shlib(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; + filename::String=name, + cflags=``, + method_table=StaticCompiler.method_table, + kwargs...) + +compile_shlib(funcs::Array, [path::String="./"]; + filename="libfoo", + demangle=true, + cflags=``, + method_table=StaticCompiler.method_table, + kwargs...) ``` As `compile_executable`, but compiling to a standalone `.dylib`/`.so` shared library. +Arguments and returned values from `compile_shlib` must be native objects such as `Int`, `Float64`, or `Ptr`. They cannot be things like `Tuple{Int, Int}` because that is not natively sized. Such objects need to be passed by reference instead of by value. + If `demangle` is set to `false`, compiled function names are prepended with "julia_". ### Examples @@ -353,38 +194,7 @@ function compile_shlib(funcs::Union{Array,Tuple}, path::String="./"; joinpath(abspath(path), filename * "." * Libdl.dlext) end - -""" -```julia -compile_wasm(f::Function, types::Tuple, [path::String="./"], [name::String=string(nameof(f))]; filename::String=name, flags=``, kwargs...) -compile_wasm(funcs::Union{Array,Tuple}, [path::String="./"]; filename="libfoo", demangle=true, flags=``, kwargs...) -``` -As `compile_shlib`, but compiling to a WebAssembly library. - -If `demangle` is set to `false`, compiled function names are prepended with "julia_". -``` -""" -function compile_wasm(f::Function, types=(); - path::String = "./", - filename = fix_name(f), - flags = ``, - kwargs... - ) - tt = Base.to_tuple_type(types) - obj_path, name = generate_obj_for_compile(f, tt, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) - run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/obj.o -o $path/$name.wasm`) - joinpath(abspath(path), filename * ".wasm") -end -function compile_wasm(funcs::Union{Array,Tuple}; - path::String="./", - filename="libfoo", - flags=``, - kwargs... - ) - obj_path, name = generate_obj(funcs, true, path, filename; target = (triple = "wasm32-unknown-wasi", cpu = "", features = ""), remove_julia_addrspaces = true, kwargs...) - run(`$(lld()) -flavor wasm --no-entry --export-all $flags $obj_path/$filename.o -o $path/$filename.wasm`) - joinpath(abspath(path), filename * ".wasm") -end + """ ```julia @@ -429,6 +239,7 @@ function generate_shlib_fptr(path::String, name, filename::String=name) @assert fptr != C_NULL fptr end + # As above, but also compile (maybe remove this method in the future?) function generate_shlib_fptr(f, tt, path::String=tempname(), name=fix_name(f), filename::String=name; temp::Bool=true, @@ -586,8 +397,10 @@ function native_llvm_module(f, tt, name=fix_name(f); demangle, kwargs...) name = "julia_"*name end job, kwargs = native_job(f, tt, true; name, kwargs...) - m, _ = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + m = GPUCompiler.JuliaContext() do context + m, _ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + locate_pointers_and_runtime_calls(m) + m end return m end @@ -596,25 +409,26 @@ end function native_llvm_module(funcs::Union{Array,Tuple}; demangle=true, kwargs...) f,tt = funcs[1] mod = GPUCompiler.JuliaContext() do context - name_f = fix_name(f) - if !demangle - name_f = "julia_"*name_f - end - job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) - mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) - if length(funcs) > 1 - for func in funcs[2:end] - f,tt = func - name_f = fix_name(f) - if !demangle - name_f = "julia_"*name_f - end - job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) - tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) - link!(mod,tmod) - end - end - mod + name_f = fix_name(f) + if !demangle + name_f = "julia_"*name_f + end + job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + mod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + if length(funcs) > 1 + for func in funcs[2:end] + f,tt = func + name_f = fix_name(f) + if !demangle + name_f = "julia_"*name_f + end + job, kwargs = native_job(f, tt, true; name = name_f, kwargs...) + tmod,_ = GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false) + link!(mod,tmod) + end + end + locate_pointers_and_runtime_calls(mod) + mod end # Just to be sure for (modfunc, func) in zip(functions(mod), funcs) @@ -644,7 +458,6 @@ end """ ```julia generate_obj(f, tt, external::Bool, path::String = tempname(), filenamebase::String="obj"; - mixtape = NoContext(), target = (), demangle = true, strip_llvm = false, @@ -656,9 +469,6 @@ Low level interface for compiling object code (`.o`) for for function `f` given a tuple type `tt` characterizing the types of the arguments for which the function will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using -[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. - `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. @@ -688,7 +498,6 @@ end """ ```julia generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = tempname(), filenamebase::String="obj"; - mixtape = NoContext(), target = (), demangle =false, strip_llvm = false, @@ -700,9 +509,6 @@ Low level interface for compiling object code (`.o`) for an array of Tuples (f, tt) where each function `f` and tuple type `tt` determine the set of methods which will be compiled. -`mixtape` defines a context that can be used to transform IR prior to compilation using -[Mixtape](https://github.com/JuliaCompilerPlugins/Mixtape.jl) features. - `target` can be used to change the output target. This is useful for compiling to WebAssembly and embedded targets. This is a named tuple with fields `triple`, `cpu`, and `features` (each of these are strings). The defaults compile to the native target. @@ -717,8 +523,11 @@ function generate_obj(funcs::Union{Array,Tuple}, external::Bool, path::String = mkpath(path) obj_path = joinpath(path, "$filenamebase.o") mod = native_llvm_module(funcs; demangle, kwargs...) - fakejob, _ = native_job(f, tt, external; kwargs...) - obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + obj = GPUCompiler.JuliaContext() do ctx + fakejob, _ = native_job(f, tt, external; kwargs...) + obj, _ = GPUCompiler.emit_asm(fakejob, mod; strip=strip_asm, validate=false, format=LLVM.API.LLVMObjectFile) + obj + end open(obj_path, "w") do io write(io, obj) end diff --git a/src/code_loading.jl b/src/code_loading.jl deleted file mode 100644 index 8fc1ae60..00000000 --- a/src/code_loading.jl +++ /dev/null @@ -1,84 +0,0 @@ -""" - load_function(path) --> compiled_f - -load a `StaticCompiledFunction` from a given path. This object is callable. -""" -function load_function(path; filename="obj") - instantiate(deserialize(joinpath(path, "$filename.cjl"))) -end - -struct LazyStaticCompiledFunction{rt, tt} - f::Symbol - path::String - name::String - filename::String - reloc::IdDict{Any,String} -end - -""" - unsafe_pointer_from_objref(x) - -Sometimes Julia embeds immutables like `Base.string` into code, and julia -will error if you call `pointer_from_objref(string)`, claiming that it -doesn't have a pointer even though that's a lie. -""" -unsafe_pointer_from_objref(x) = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), x) - -function instantiate(p::LazyStaticCompiledFunction{rt, tt}) where {rt, tt} - # LLVM.load_library_permantly(dirname(Libdl.dlpath(Libdl.dlopen("libjulia")))) - lljit = LLVM.LLJIT(;tm=LLVM.JITTargetMachine()) - jd = LLVM.JITDylib(lljit) - flags = LLVM.API.LLVMJITSymbolFlags(LLVM.API.LLVMJITSymbolGenericFlagsExported, 0) - ofile = LLVM.MemoryBufferFile(joinpath(p.path, "$(p.filename).o")) #$(Libdl.dlext) - - - # Set all the uninitialized global variables to point to julia values from the relocation table - for (val, name) ∈ p.reloc - address = LLVM.API.LLVMOrcJITTargetAddress(reinterpret(UInt, unsafe_pointer_from_objref(val))) - - symbol = LLVM.API.LLVMJITEvaluatedSymbol(address, flags) - gv = LLVM.API.LLVMJITCSymbolMapPair(LLVM.mangle(lljit, name), symbol) - mu = absolute_symbols(Ref(gv)) - LLVM.define(jd, mu) - end - # consider switching to one mu for all gvs instead of one per gv. - # I tried that already, but I got an error saying - # JIT session error: Symbols not found: [ __Type_Vector_Float64___274 ] - - # Link to libjulia - prefix = LLVM.get_prefix(lljit) - dg = LLVM.CreateDynamicLibrarySearchGeneratorForProcess(prefix) - LLVM.add!(jd, dg) - LLVM.add!(lljit, jd, ofile) - fptr = pointer(LLVM.lookup(lljit, p.name)) - - StaticCompiledFunction{rt, tt}(p.f, fptr, lljit, p.reloc) -end - -function absolute_symbols(symbols) - ref = LLVM.API.LLVMOrcAbsoluteSymbols(symbols, length(symbols)) - LLVM.MaterializationUnit(ref) -end - - -struct StaticCompiledFunction{rt, tt} - f::Symbol - ptr::Ptr{Nothing} - jit::LLVM.LLJIT - reloc::IdDict{Any, String} -end - -function Base.show(io::IO, f::StaticCompiledFunction{rt, tt}) where {rt, tt} - types = [tt.parameters...] - print(io, String(f.f), "(", join(("::$T" for T ∈ tt.parameters), ',') ,") :: $rt") -end - -function (f::StaticCompiledFunction{rt, tt})(args...) where {rt, tt} - Tuple{typeof.(args)...} == tt || error("Input types don't match compiled target $((tt.parameters...,)). Got arguments of type $(typeof.(args))") - out = RefValue{rt}() - refargs = Ref(args) - ccall(f.ptr, Nothing, (Ptr{rt}, Ref{tt}), pointer_from_objref(out), refargs) - out[] -end - -instantiate(f::StaticCompiledFunction) = f diff --git a/src/interpreter.jl b/src/interpreter.jl index c0e00c37..344cc53d 100644 --- a/src/interpreter.jl +++ b/src/interpreter.jl @@ -7,7 +7,7 @@ using GPUCompiler: using CodeInfoTools using CodeInfoTools: resolve -struct StaticInterpreter{M} <: AbstractInterpreter +struct StaticInterpreter <: AbstractInterpreter global_cache::CodeCache method_table::Union{Nothing,Core.MethodTable} @@ -20,13 +20,10 @@ struct StaticInterpreter{M} <: AbstractInterpreter inf_params::InferenceParams opt_params::OptimizationParams - # Mixtape context - mixtape::M - - function StaticInterpreter(cache::CodeCache, mt::Union{Nothing,Core.MethodTable}, world::UInt, ip::InferenceParams, op::OptimizationParams, mixtape::CompilationContext) + function StaticInterpreter(cache::CodeCache, mt::Union{Nothing,Core.MethodTable}, world::UInt, ip::InferenceParams, op::OptimizationParams) @assert world <= Base.get_world_counter() - return new{typeof(mixtape)}( + return new( cache, mt, @@ -38,10 +35,7 @@ struct StaticInterpreter{M} <: AbstractInterpreter # parameters for inference and optimization ip, - op, - - # Mixtape context - mixtape + op ) end end @@ -79,9 +73,6 @@ function custom_pass!(interp::StaticInterpreter, result::InferenceResult, mi::Co mi.specTypes isa UnionAll && return src sig = Tuple(mi.specTypes.parameters) as = map(resolve_generic, sig) - if allow(interp.mixtape, mi.def.module, as...) - src = transform(interp.mixtape, src, sig) - end return src end @@ -102,22 +93,21 @@ end Core.Compiler.may_optimize(interp::StaticInterpreter) = true Core.Compiler.may_compress(interp::StaticInterpreter) = true Core.Compiler.may_discard_trees(interp::StaticInterpreter) = true -if VERSION >= v"1.7.0-DEV.577" Core.Compiler.verbose_stmt_info(interp::StaticInterpreter) = false -end + if isdefined(Base.Experimental, Symbol("@overlay")) -using Core.Compiler: OverlayMethodTable -if v"1.8-beta2" <= VERSION < v"1.9-" || VERSION >= v"1.9.0-DEV.120" -Core.Compiler.method_table(interp::StaticInterpreter) = - OverlayMethodTable(interp.world, interp.method_table) -else -Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = - OverlayMethodTable(interp.world, interp.method_table) -end + using Core.Compiler: OverlayMethodTable + if v"1.8-beta2" <= VERSION < v"1.9-" || VERSION >= v"1.9.0-DEV.120" + Core.Compiler.method_table(interp::StaticInterpreter) = + OverlayMethodTable(interp.world, interp.method_table) + else + Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = + OverlayMethodTable(interp.world, interp.method_table) + end else -Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = - WorldOverlayMethodTable(interp.world) + Core.Compiler.method_table(interp::StaticInterpreter, sv::InferenceState) = + WorldOverlayMethodTable(interp.world) end # semi-concrete interepretation is broken with overlays (JuliaLang/julia#47349) @@ -134,13 +124,11 @@ end struct StaticCompilerParams <: AbstractCompilerParams opt::Bool optlevel::Int - mixtape::CompilationContext cache::CodeCache end function StaticCompilerParams(; opt = false, optlevel = Base.JLOptions().opt_level, - mixtape = NoContext(), cache = CodeCache()) - return StaticCompilerParams(opt, optlevel, mixtape, cache) + return StaticCompilerParams(opt, optlevel, cache) end diff --git a/src/optimize.jl b/src/optimize.jl deleted file mode 100644 index b781517b..00000000 --- a/src/optimize.jl +++ /dev/null @@ -1,328 +0,0 @@ -# stolen from https://github.com/EnzymeAD/Enzyme.jl/blob/1b187cc16953727cab26b64bc6a6dcf106c29a57/src/compiler/optimize.jl#L213 - -function optimize!(mod::LLVM.Module, tm) - # everying except unroll, slpvec, loop-vec - # then finish Julia GC - ModulePassManager() do pm - add_library_info!(pm, triple(mod)) - add_transform_info!(pm, tm) - - propagate_julia_addrsp!(pm) - scoped_no_alias_aa!(pm) - type_based_alias_analysis!(pm) - basic_alias_analysis!(pm) - cfgsimplification!(pm) - dce!(pm) -@static if isdefined(GPUCompiler, :cpu_features!) - GPUCompiler.cpu_features!(pm) -end - scalar_repl_aggregates_ssa!(pm) # SSA variant? - mem_cpy_opt!(pm) - always_inliner!(pm) - alloc_opt!(pm) - instruction_combining!(pm) - cfgsimplification!(pm) - scalar_repl_aggregates_ssa!(pm) # SSA variant? - instruction_combining!(pm) - jump_threading!(pm) - correlated_value_propagation!(pm) - instruction_combining!(pm) - reassociate!(pm) - early_cse!(pm) - alloc_opt!(pm) - loop_idiom!(pm) - loop_rotate!(pm) - lower_simdloop!(pm) - licm!(pm) - loop_unswitch!(pm) - instruction_combining!(pm) - ind_var_simplify!(pm) - loop_deletion!(pm) - loop_unroll!(pm) - alloc_opt!(pm) - scalar_repl_aggregates_ssa!(pm) # SSA variant? - gvn!(pm) - # This InstCombine needs to be after GVN - # Otherwise it will generate load chains in GPU code... - instruction_combining!(pm) - mem_cpy_opt!(pm) - sccp!(pm) - instruction_combining!(pm) - jump_threading!(pm) - dead_store_elimination!(pm) - alloc_opt!(pm) - cfgsimplification!(pm) - loop_idiom!(pm) - loop_deletion!(pm) - jump_threading!(pm) - correlated_value_propagation!(pm) - # SLP_Vectorizer -- not for Enzyme - aggressive_dce!(pm) - instruction_combining!(pm) - # Loop Vectorize -- not for Enzyme - # InstCombine - - # GC passes - barrier_noop!(pm) - gc_invariant_verifier!(pm, false) - - # FIXME: Currently crashes printing - cfgsimplification!(pm) - instruction_combining!(pm) # Extra for Enzyme - #API.EnzymeAddAttributorLegacyPass(pm) - run!(pm, mod) - end - # @show "omod", mod - # flush(stdout) - # flush(stderr) -end - -# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L603 -function addTargetPasses!(pm, tm) - add_library_info!(pm, LLVM.triple(tm)) - add_transform_info!(pm, tm) -end - -# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L620 -function addOptimizationPasses!(pm) - constant_merge!(pm) - - propagate_julia_addrsp!(pm) - scoped_no_alias_aa!(pm) - type_based_alias_analysis!(pm) - basic_alias_analysis!(pm) - cfgsimplification!(pm) - dce!(pm) - scalar_repl_aggregates!(pm) - - # mem_cpy_opt!(pm) - - always_inliner!(pm) # Respect always_inline - - # Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time - # merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt` - # pass. - - alloc_opt!(pm) - # consider AggressiveInstCombinePass at optlevel > 2 - - instruction_combining!(pm) - cfgsimplification!(pm) - scalar_repl_aggregates!(pm) - instruction_combining!(pm) # TODO: createInstSimplifyLegacy - jump_threading!(pm) - correlated_value_propagation!(pm) - - reassociate!(pm) - - early_cse!(pm) - - # Load forwarding above can expose allocations that aren't actually used - # remove those before optimizing loops. - alloc_opt!(pm) - loop_rotate!(pm) - # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1) - loop_idiom!(pm) - - # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards - lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop - licm!(pm) - julia_licm!(pm) - # Subsequent passes not stripping metadata from terminator - instruction_combining!(pm) # TODO: createInstSimplifyLegacy - ind_var_simplify!(pm) - loop_deletion!(pm) - loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll - - # Run our own SROA on heap objects before LLVM's - alloc_opt!(pm) - # Re-run SROA after loop-unrolling (useful for small loops that operate, - # over the structure of an aggregate) - scalar_repl_aggregates!(pm) - instruction_combining!(pm) # TODO: createInstSimplifyLegacy - - gvn!(pm) - mem_cpy_opt!(pm) - sccp!(pm) - - # Run instcombine after redundancy elimination to exploit opportunities - # opened up by them. - # This needs to be InstCombine instead of InstSimplify to allow - # loops over Union-typed arrays to vectorize. - instruction_combining!(pm) - jump_threading!(pm) - dead_store_elimination!(pm) - - # More dead allocation (store) deletion before loop optimization - # consider removing this: - alloc_opt!(pm) - - # see if all of the constant folding has exposed more loops - # to simplification and deletion - # this helps significantly with cleaning up iteration - cfgsimplification!(pm) - loop_deletion!(pm) - instruction_combining!(pm) - loop_vectorize!(pm) - # TODO: createLoopLoadEliminationPass - cfgsimplification!(pm) - slpvectorize!(pm) - # might need this after LLVM 11: - # TODO: createVectorCombinePass() - - aggressive_dce!(pm) -end - -function addMachinePasses!(pm) - combine_mul_add!(pm) - # TODO: createDivRemPairs[] - - demote_float16!(pm) - gvn!(pm) -end - -function addJuliaLegalizationPasses!(pm, lower_intrinsics=true) - if lower_intrinsics - # LowerPTLS removes an indirect call. As a result, it is likely to trigger - # LLVM's devirtualization heuristics, which would result in the entire - # pass pipeline being re-exectuted. Prevent this by inserting a barrier. - barrier_noop!(pm) - lower_exc_handlers!(pm) - gc_invariant_verifier!(pm, false) - - # Needed **before** LateLowerGCFrame on LLVM < 12 - # due to bug in `CreateAlignmentAssumption`. - remove_ni!(pm) - late_lower_gc_frame!(pm) - final_lower_gc!(pm) - # We need these two passes and the instcombine below - # after GC lowering to let LLVM do some constant propagation on the tags. - # and remove some unnecessary write barrier checks. - gvn!(pm) - sccp!(pm) - # Remove dead use of ptls - dce!(pm) - lower_ptls!(pm, #=dump_native=# false) - instruction_combining!(pm) - # Clean up write barrier and ptls lowering - cfgsimplification!(pm) - else - barrier_noop!(pm) - remove_ni!(pm) - end -end - -function post_optimize!(mod, tm; remove_julia_addrspaces = false) - # @show "pre_post", mod - # flush(stdout) - # flush(stderr) - LLVM.ModulePassManager() do pm - addTargetPasses!(pm, tm) - addOptimizationPasses!(pm) - run!(pm, mod) - end - LLVM.ModulePassManager() do pm - addJuliaLegalizationPasses!(pm, true) - addMachinePasses!(pm) - if remove_julia_addrspaces - remove_julia_addrspaces!(pm) - end - run!(pm, mod) - end - # @show "post_mod", mod - # flush(stdout) - # flush(stderr) -end - - - - -const inactivefns = Set{String}(( - "jl_gc_queue_root", "gpu_report_exception", "gpu_signal_exception", - "julia.ptls_states", "julia.write_barrier", "julia.typeof", "jl_box_int64", "jl_box_int32", - "jl_subtype", "julia.get_pgcstack", "jl_in_threaded_region", "jl_object_id_", "jl_object_id", - "jl_breakpoint", - "llvm.julia.gc_preserve_begin","llvm.julia.gc_preserve_end", "jl_get_ptls_states", - "jl_f_fieldtype", - "jl_symbol_n", - # BIG TODO - "jl_gc_add_finalizer_th", - # "jl_" -)) - -const activefns = Set{String}(( - "jl_", -)) - -function annotate!(mod) - inactive = LLVM.StringAttribute("enzyme_inactive", "") - active = LLVM.StringAttribute("enzyme_active", "") - fns = functions(mod) - - for inactivefn in inactivefns - if haskey(fns, inactivefn) - fn = fns[inactivefn] - push!(function_attributes(fn), inactive) - end - end - - for activefn in activefns - if haskey(fns, activefn) - fn = fns[activefn] - push!(function_attributes(fn), active) - end - end - - for fname in ("julia.typeof",) - if haskey(fns, fname) - fn = fns[fname] - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) - push!(function_attributes(fn), LLVM.StringAttribute("enzyme_shouldrecompute")) - end - end - - for fname in ("julia.get_pgcstack", "julia.ptls_states", "jl_get_ptls_states") - if haskey(fns, fname) - fn = fns[fname] - # TODO per discussion w keno perhaps this should change to readonly / inaccessiblememonly - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) - end - end - - for fname in ("julia.pointer_from_objref",) - if haskey(fns, fname) - fn = fns[fname] - push!(function_attributes(fn), LLVM.EnumAttribute("readnone", 0)) - end - end - - for boxfn in ("jl_box_float32", "jl_box_float64", "jl_box_int32", "jl_box_int64", "julia.gc_alloc_obj", "jl_alloc_array_1d", "jl_alloc_array_2d", "jl_alloc_array_3d") - if haskey(fns, boxfn) - fn = fns[boxfn] - push!(return_attributes(fn), LLVM.EnumAttribute("noalias", 0)) - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) - end - end - - for gc in ("llvm.julia.gc_preserve_begin", "llvm.julia.gc_preserve_end") - if haskey(fns, gc) - fn = fns[gc] - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) - end - end - - for rfn in ("jl_object_id_", "jl_object_id") - if haskey(fns, rfn) - fn = fns[rfn] - push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0)) - end - end - - for rfn in ("jl_in_threaded_region_", "jl_in_threaded_region") - if haskey(fns, rfn) - fn = fns[rfn] - push!(function_attributes(fn), LLVM.EnumAttribute("readonly", 0)) - push!(function_attributes(fn), LLVM.EnumAttribute("inaccessiblememonly", 0)) - end - end -end diff --git a/src/pointer_patching.jl b/src/pointer_patching.jl deleted file mode 100644 index 27a44a08..00000000 --- a/src/pointer_patching.jl +++ /dev/null @@ -1,185 +0,0 @@ -function relocation_table!(mod) - i64 = LLVM.IntType(64) - d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() - - for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) - if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) - get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) - @debug "Relocating StoreInst" inst - get_pointers!(d, mod, inst) - elseif inst isa LLVM.RetInst && occursin("inttoptr", string(inst)) - @debug "Relocating RetInst" inst LLVM.operands(inst) - get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.BitCastInst) && occursin("inttoptr", string(inst)) - @debug "Relocating BitCastInst" inst LLVM.operands(inst) - get_pointers!(d, mod, inst) - elseif isa(inst, LLVM.CallInst) - @debug "Relocating CallInst" inst LLVM.operands(inst) - dest = LLVM.called_value(inst) - if occursin("inttoptr", string(dest)) && length(LLVM.operands(dest)) > 0 - @debug "Relocating CallInst inttoptr" dest LLVM.operands(dest) LLVM.operands(inst) - ptr_arg = first(LLVM.operands(dest)) - ptr_val = convert(Int, ptr_arg) - ptr = Ptr{Cvoid}(ptr_val) - - frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) - if length(frames) >= 1 - fn, file, line, linfo, fromC, inlined = last(frames) - fn = string(fn) - if ptr == cglobal(:jl_alloc_array_1d) - fn = "jl_alloc_array_1d" - end - if ptr == cglobal(:jl_alloc_array_2d) - fn = "jl_alloc_array_2d" - end - if ptr == cglobal(:jl_alloc_array_3d) - fn = "jl_alloc_array_3d" - end - if ptr == cglobal(:jl_new_array) - fn = "jl_new_array" - end - if ptr == cglobal(:jl_array_copy) - fn = "jl_array_copy" - end - if ptr == cglobal(:jl_alloc_string) - fn = "jl_alloc_string" - end - if ptr == cglobal(:jl_in_threaded_region) - fn = "jl_in_threaded_region" - end - if ptr == cglobal(:jl_enter_threaded_region) - fn = "jl_enter_threaded_region" - end - if ptr == cglobal(:jl_exit_threaded_region) - fn = "jl_exit_threaded_region" - end - if ptr == cglobal(:jl_set_task_tid) - fn = "jl_set_task_tid" - end - if ptr == cglobal(:jl_new_task) - fn = "jl_new_task" - end - if ptr == cglobal(:malloc) - fn = "malloc" - end - if ptr == cglobal(:memmove) - fn = "memmove" - end - if ptr == cglobal(:jl_array_grow_beg) - fn = "jl_array_grow_beg" - end - if ptr == cglobal(:jl_array_grow_end) - fn = "jl_array_grow_end" - end - if ptr == cglobal(:jl_array_grow_at) - fn = "jl_array_grow_at" - end - if ptr == cglobal(:jl_array_del_beg) - fn = "jl_array_del_beg" - end - if ptr == cglobal(:jl_array_del_end) - fn = "jl_array_del_end" - end - if ptr == cglobal(:jl_array_del_at) - fn = "jl_array_del_at" - end - if ptr == cglobal(:jl_array_ptr) - fn = "jl_array_ptr" - end - if ptr == cglobal(:jl_value_ptr) - fn = "jl_value_ptr" - end - if ptr == cglobal(:jl_get_ptls_states) - fn = "jl_get_ptls_states" - end - if ptr == cglobal(:jl_gc_add_finalizer_th) - fn = "jl_gc_add_finalizer_th" - end - if ptr == cglobal(:jl_symbol_n) - fn = "jl_symbol_n" - end - end - - if length(fn) > 1 && fromC - mod = LLVM.parent(LLVM.parent(LLVM.parent(inst))) - lfn = LLVM.API.LLVMGetNamedFunction(mod, fn) - - if lfn == C_NULL - lfn = LLVM.API.LLVMAddFunction(mod, fn, LLVM.API.LLVMGetCalledFunctionType(inst)) - else - lfn = LLVM.API.LLVMConstBitCast(lfn, LLVM.PointerType(LLVM.FunctionType(LLVM.API.LLVMGetCalledFunctionType(inst)))) - end - LLVM.API.LLVMSetOperand(inst, LLVM.API.LLVMGetNumOperands(inst)-1, lfn) - end - end - get_pointers!(d, mod, inst) - end - end - IdDict{Any, String}(val => name for (val, (name, _)) ∈ d) -end - -function get_pointers!(d, mod, inst) - jl_t = (LLVM.StructType(LLVM.LLVMType[])) - for (i, arg) ∈ enumerate(LLVM.operands(inst)) - if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr - op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) - if op1 isa LLVM.ConstantExpr - op1 = LLVM.Value(LLVM.API.LLVMGetOperand(op1, 0)) - end - ptr = Ptr{Cvoid}(convert(Int, op1)) - frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) - if length(frames) >= 1 - fn, file, line, linfo, fromC, inlined = last(frames) - if (isempty(String(fn)) && isempty(String(file))) || fn == :jl_system_image_data - val = unsafe_pointer_to_objref(ptr) - if val ∈ keys(d) - _, gv = d[val] - LLVM.API.LLVMSetOperand(inst, i-1, gv) - else - gv_name = fix_name(String(gensym(repr(Core.Typeof(val))))) - gv = LLVM.GlobalVariable(mod, llvmeltype(arg), gv_name, LLVM.addrspace(value_type(arg))) - - LLVM.extinit!(gv, true) - LLVM.API.LLVMSetOperand(inst, i-1, gv) - - d[val] = (gv_name, gv) - end - else - @warn "Found data we don't know how to relocate." frames - end - end - end - end -end - -llvmeltype(x::LLVM.Value) = eltype(LLVM.value_type(x)) - -function pointer_patching_diff(f, tt, path1=tempname(), path2=tempname(); show_reloc_table=false) - tm = GPUCompiler.llvm_machine(NativeCompilerTarget()) - job, kwargs = native_job(f, tt, false; name=fix_name(string(nameof(f)))) - #Get LLVM to generated a module of code for us. We don't want GPUCompiler's optimization passes. - mod, meta = GPUCompiler.JuliaContext() do context - GPUCompiler.codegen(:llvm, job; strip=true, only_entry=false, validate=false, optimize=false) - end - # Use Enzyme's annotation and optimization pipeline - annotate!(mod) - optimize!(mod, tm) - - s1 = string(mod) - write(path1, s1) - - d = StaticCompiler.relocation_table!(mod) - if show_reloc_table - @show d - end - - s2 = string(mod) - write(path2, s2) - - pdiff = run(Cmd(`diff $path1 $path2`, ignorestatus=true)) - pdiff.exitcode == 2 && error("Showing diff caused an error") - nothing -end - - diff --git a/src/pointer_warning.jl b/src/pointer_warning.jl new file mode 100644 index 00000000..9f8f30c8 --- /dev/null +++ b/src/pointer_warning.jl @@ -0,0 +1,72 @@ +function locate_pointers_and_runtime_calls(mod) + i64 = LLVM.IntType(64) + # d = IdDict{Any, Tuple{String, LLVM.GlobalVariable}}() + for func ∈ LLVM.functions(mod), bb ∈ LLVM.blocks(func), inst ∈ LLVM.instructions(bb) + warned = false + if isa(inst, LLVM.LoadInst) && occursin("inttoptr", string(inst)) + warned = inspect_pointers(mod, inst) + elseif isa(inst, LLVM.StoreInst) && occursin("inttoptr", string(inst)) + @debug "Inspecting StoreInst" inst + warned = inspect_pointers(mod, inst) + elseif inst isa LLVM.RetInst && occursin("inttoptr", string(inst)) + @debug "Inspecting RetInst" inst LLVM.operands(inst) + warned = inspect_pointers(mod, inst) + elseif isa(inst, LLVM.BitCastInst) && occursin("inttoptr", string(inst)) + @debug "Inspecting BitCastInst" inst LLVM.operands(inst) + warned = inspect_pointers(mod, inst) + elseif isa(inst, LLVM.CallInst) + @debug "Inspecting CallInst" inst LLVM.operands(inst) + dest = LLVM.called_operand(inst) + if occursin("inttoptr", string(dest)) && length(LLVM.operands(dest)) > 0 + @debug "Inspecting CallInst inttoptr" dest LLVM.operands(dest) LLVM.operands(inst) + ptr_arg = first(LLVM.operands(dest)) + ptr_val = convert(Int, ptr_arg) + ptr = Ptr{Cvoid}(ptr_val) + + frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) + + data_warnings(inst, frames) + warned = true + end + end + if warned + @warn("LLVM function generated warnings due to raw pointers embedded in the code. This will likely cause errors or undefined behaviour.", + func = func) + end + end +end + +function inspect_pointers(mod, inst) + warned = false + jl_t = (LLVM.StructType(LLVM.LLVMType[])) + for (i, arg) ∈ enumerate(LLVM.operands(inst)) + if occursin("inttoptr", string(arg)) && arg isa LLVM.ConstantExpr + op1 = LLVM.Value(LLVM.API.LLVMGetOperand(arg, 0)) + if op1 isa LLVM.ConstantExpr + op1 = LLVM.Value(LLVM.API.LLVMGetOperand(op1, 0)) + end + ptr = Ptr{Cvoid}(convert(Int, op1)) + frames = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint,), ptr, 0) + data_warnings(inst, frames) + warned = true + end + end + warned +end + +data_warnings(inst, frames) = for frame ∈ frames + fn, file, line, linfo, fromC, inlined = frame + @warn("Found pointer references to julia data", + "llvm instruction" = inst, + name = fn, + file = file, + line = line, + fromC = fromC, + inlined = inlined) +end + +llvmeltype(x::LLVM.Value) = eltype(LLVM.value_type(x)) + + + + diff --git a/src/target.jl b/src/target.jl index d9647b4d..777960a3 100644 --- a/src/target.jl +++ b/src/target.jl @@ -4,7 +4,6 @@ else const method_table = nothing end -const overrides = quote end """ ```julia @@ -25,26 +24,23 @@ macro device_override(ex) error() end code = quote - $GPUCompiler.@override(StaticCompiler.method_table, $ex) - end - if isdefined(Base.Experimental, Symbol("@overlay")) - return esc(code) - else - push!(overrides, code) - return + $Base.Experimental.@overlay($StaticCompiler.method_table, $ex) end + return esc(code) end -Base.@kwdef struct NativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget +Base.@kwdef struct NativeCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget triple::String=Sys.MACHINE cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) + method_table::MT = method_table end -Base.@kwdef struct ExternalNativeCompilerTarget <: GPUCompiler.AbstractCompilerTarget +Base.@kwdef struct ExternalNativeCompilerTarget{MT} <: GPUCompiler.AbstractCompilerTarget triple::String=Sys.MACHINE cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) + method_table::MT = method_table end module StaticRuntime @@ -72,44 +68,43 @@ for target in (:NativeCompilerTarget, :ExternalNativeCompilerTarget) return tm end - GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{$target}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))" + GPUCompiler.runtime_slug(job::GPUCompiler.CompilerJob{<:$target}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))" - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target}) = StaticRuntime - GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = StaticRuntime + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:$target}) = StaticRuntime + GPUCompiler.runtime_module(::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = StaticRuntime - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = true - GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{$target}) = true + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = true + GPUCompiler.can_throw(job::GPUCompiler.CompilerJob{<:$target}) = true - GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = + GPUCompiler.get_interpreter(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = StaticInterpreter(job.config.params.cache, GPUCompiler.method_table(job), job.world, - GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job), - job.config.params.mixtape) - GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{$target, StaticCompilerParams}) = job.config.params.cache + GPUCompiler.inference_params(job), GPUCompiler.optimization_params(job)) + GPUCompiler.ci_cache(job::GPUCompiler.CompilerJob{<:$target, StaticCompilerParams}) = job.config.params.cache + GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{<:$target})) = job.config.target.method_table end end -GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget})) = method_table -GPUCompiler.method_table(@nospecialize(job::GPUCompiler.CompilerJob{ExternalNativeCompilerTarget, StaticCompilerParams})) = method_table - function native_job(@nospecialize(func::Function), @nospecialize(types::Type), external::Bool; - mixtape = NoContext(), name = fix_name(func), kernel::Bool = false, - target = (), + target = (;), + method_table=method_table, kwargs... ) + target = merge(target, (;method_table)) source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) - params = StaticCompilerParams(mixtape = mixtape) + params = StaticCompilerParams() config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) StaticCompiler.CompilerJob(source, config), kwargs end -function native_job(@nospecialize(func), @nospecialize(types), external; mixtape = NoContext(), kernel::Bool=false, name=fix_name(repr(func)), target = (), kwargs...) +function native_job(@nospecialize(func), @nospecialize(types), external; kernel::Bool=false, name=fix_name(repr(func)), target = (;), method_table=method_table, kwargs...) + target = merge(target, (; method_table)) source = methodinstance(typeof(func), Base.to_tuple_type(types)) target = external ? ExternalNativeCompilerTarget(;target...) : NativeCompilerTarget(;target...) - params = StaticCompilerParams(mixtape = mixtape) + params = StaticCompilerParams() config = GPUCompiler.CompilerConfig(target, params, name = name, kernel = kernel) GPUCompiler.CompilerJob(source, config), kwargs end diff --git a/test/Project.toml b/test/Project.toml index cc88d4e8..5498846c 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,4 @@ [deps] -CodeInfoTools = "bc773b8a-8374-437a-b9f2-0e9785855863" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" @@ -14,3 +13,7 @@ StaticTools = "86c06d3c-3f03-46de-9781-57580aa96d0a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" + +[compat] +Bumper = "0.5.1" \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 697ef7e8..542659c2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,19 +7,19 @@ using ManualMemory using Distributed using StaticTools using StrideArraysCore -using CodeInfoTools using MacroTools using LLD_jll +using Bumper addprocs(1) @everywhere using StaticCompiler, StrideArraysCore const GROUP = get(ENV, "GROUP", "All") -@static if GROUP == "Core" || GROUP == "All" +if GROUP == "Core" || GROUP == "All" include("testcore.jl") end -@static if GROUP == "Integration" || GROUP == "All" +if GROUP == "Integration" || GROUP == "All" include("testintegration.jl") end diff --git a/test/testcore.jl b/test/testcore.jl index 823729cb..e705ed64 100644 --- a/test/testcore.jl +++ b/test/testcore.jl @@ -1,234 +1,9 @@ workdir = tempdir() -# workdir = "./" # For debugging -remote_load_call(path, args...) = fetch(@spawnat 2 load_function(path)(args...)) - -@testset "Basics" begin - - simple_sum(x) = x + one(typeof(x)) - - # This probably needs a macro - for T ∈ (Int, Float64, Int32, Float32, Int16, Float16) - _, path, = compile(simple_sum, (T,)) - @test remote_load_call(path, T(1)) == T(2) - end -end fib(n) = n <= 1 ? n : fib(n - 1) + fib(n - 2) # This needs to be defined globally due to https://github.com/JuliaLang/julia/issues/40990 -@testset "Recursion" begin - _, path = compile(fib, (Int,)) - @test remote_load_call(path, 10) == fib(10) - - # Trick to work around #40990 - _fib2(_fib2, n) = n <= 1 ? n : _fib2(_fib2, n-1) + _fib2(_fib2, n-2) - fib2(n) = _fib2(_fib2, n) - - _, path = compile(fib2, (Int,)) - @test remote_load_call(path, 20) == fib(20) - #@test compile(fib2, (Int,))[1](20) == fib(20) -end - - -@testset "Loops" begin - function sum_first_N_int(N) - s = 0 - for a in 1:N - s += a - end - s - end - _, path = compile(sum_first_N_int, (Int,)) - @test remote_load_call(path, 10) == 55 - - function sum_first_N_float64(N) - s = Float64(0) - for a in 1:N - s += Float64(a) - end - s - end - _, path = compile(sum_first_N_float64, (Int,)) - @test remote_load_call(path, 10) == 55. - - function sum_first_N_int_inbounds(N) - s = 0 - @inbounds for a in 1:N - s += a - end - s - end - _, path = compile(sum_first_N_int_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55 - - function sum_first_N_float64_inbounds(N) - s = Float64(0) - @inbounds for a in 1:N - s += Float64(a) - end - s - end - _, path = compile(sum_first_N_float64_inbounds, (Int,)) - @test remote_load_call(path, 10) == 55. -end - -# Arrays with different input types Int32, Int64, Float32, Float64, Complex? -@testset "Arrays" begin - function array_sum(n, A) - s = zero(eltype(A)) - for i in 1:n - s += A[i] - end - s - end - for T ∈ (Int, Complex{Float32}, Complex{Float64}) - _, path = compile(array_sum, (Int, Vector{T})) - @test remote_load_call(path, 10, T.(1:10)) == T(55) - end -end - -@testset "Array allocations" begin - function f(N) - v = Vector{Float64}(undef, N) - for i ∈ eachindex(v) - v[i] = i*i - end - v - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 5) == [1.0, 4.0, 9.0, 16.0, 25.0] -end - -# This is also a good test of loading and storing from the same object -@testset "Load & Store Same object" begin - global const x = Ref(0) - counter() = x[] += 1 - _, path = compile(counter, ()) - @spawnat 2 global counter = load_function(path) - @test fetch(@spawnat 2 counter()) == 1 - @test fetch(@spawnat 2 counter()) == 2 -end - -# This is also a good test of loading and storing from the same object -counter = let x = Ref(0) - () -> x[] += 1 -end -@testset "Closures" begin - #this currently segfaults during compilation - @test_skip begin - _, path = compile(counter, ()) - @spawnat 2 global counter_comp = load_function(path) - @test fetch(@spawnat 2 counter_comp()) == 1 - @test fetch(@spawnat 2 counter_comp()) == 2 - end -end - - -@testset "Error handling" begin - _, path = compile(sqrt, (Int,)) - tsk = @spawnat 2 begin - try - load_function(path)(-1) - catch e; - e - end - end - @test fetch(tsk) isa DomainError -end - -# Julia wants to treat Tuple (and other things like it) as plain bits, but LLVM wants to treat it as something with a pointer. -# We need to be careful to not send, nor receive an unwrapped Tuple to a compiled function. -# The interface made in `compile` should handle this fine. -@testset "Send and receive Tuple" begin - foo(u::Tuple) = 2 .* reverse(u) .- 1 - - _, path = compile(foo, (NTuple{3, Int},)) - @test remote_load_call(path, (1, 2, 3)) == (5, 3, 1) -end - - -# Just to call external libraries -@testset "BLAS" begin - function mydot(a::Vector{Float64}) - N = length(a) - BLAS.dot(N, a, 1, a, 1) - end - a = [1.0, 2.0] - - mydot_compiled, path = compile(mydot, (Vector{Float64},)) - # Works locally for me, but not on CI. Need some improvements to pointer relocation to be robust. - @test_skip remote_load_call(path, a) == 5.0 - @test mydot_compiled(a) ≈ 5.0 - - # This will need some more work apparently - @test_skip begin - _, path = compile((*), (Matrix{Float64}, Matrix{Float64})) - A, B = rand(10, 11), rand(11, 12) - @test remote_load_call(path, A, B) ≈ A * B - end -end - - -@testset "Strings" begin - function hello(name) - "Hello, " * name * "!" - end - hello_compiled, path = compile(hello, (String,)) - @test remote_load_call(path, "world") == "Hello, world!" - - # We'll need to be able to relocate a bunch of UV stuff for this, and deal with dynamic dispatch. - @test_skip begin - function hello(N) - println("Hello World $N") - N - end - - hello_compiled, path = compile(hello, (Int,)) - @test_skip remote_load_call(path, 1) == 1 - end -end - -# This is a trick to get stack allocated arrays inside a function body (so long as they don't escape). -# This lets us have intermediate, mutable stack allocated arrays inside our -@testset "Alloca" begin - function f(N) - # this can hold at most 100 Int values, if you use it for more, you'll segfault - buf = ManualMemory.MemoryBuffer{100, Int}(undef) - GC.@preserve buf begin - # wrap the first N values in a PtrArray - arr = PtrArray(pointer(buf), (N,)) - arr .= 1 # mutate the array to be all 1s - sum(arr) # compute the sum. It is very imporatant that no references to arr escape the function body - end - end - _, path = compile(f, (Int,)) - @test remote_load_call(path, 20) == 20 -end - -# I can't beleive this works. -@testset "LoopVectorization" begin - function mul!(C, A, B) - # note: @tturbo does NOT work - @turbo for n ∈ indices((C,B), 2), m ∈ indices((C,A), 1) - Cmn = zero(eltype(C)) - for k ∈ indices((A,B), (2,1)) - Cmn += A[m,k] * B[k,n] - end - C[m,n] = Cmn - end - end - - C = Array{Float64}(undef, 10, 12) - A = rand(10, 11) - B = rand(11, 12) - - _, path = compile(mul!, (Matrix{Float64}, Matrix{Float64}, Matrix{Float64},)) - # remote_load_call(path, C, A, B) This won't work because @spawnat copies C - C .= fetch(@spawnat 2 (load_function(path)(C, A, B); C)) - @test C ≈ A*B -end - @testset "Standalone Dylibs" begin # Test function # (already defined) @@ -349,3 +124,35 @@ end @test ccall(fptr, Float64, (Float64,), 10.) == squaresquaresquare(10.) #Compile dylib end + + +# Overlays + +module SubFoo + +rand(args...) = Base.rand(args...) + +function f() + x = rand() + y = rand() + return x + y +end + +end + +@device_override SubFoo.rand() = 2 + +# Lets test having another method table around +Base.Experimental.@MethodTable AnotherTable +Base.Experimental.@overlay AnotherTable SubFoo.rand() = 3 + +@testset "Overlays" begin + Libdl.dlopen(compile_shlib(SubFoo.f, (), workdir)) do lib + fptr = Libdl.dlsym(lib, "f") + @test @ccall($fptr()::Int) == 4 + end + Libdl.dlopen(compile_shlib(SubFoo.f, (), workdir; method_table=AnotherTable)) do lib + fptr = Libdl.dlsym(lib, "f") + @test @ccall($fptr()::Int) == 6 + end +end diff --git a/test/testintegration.jl b/test/testintegration.jl index ba4d7851..3422e63b 100644 --- a/test/testintegration.jl +++ b/test/testintegration.jl @@ -3,6 +3,34 @@ testpath = pwd() scratch = tempdir() cd(scratch) +if VERSION >= v"1.9" + # Bumper uses PackageExtensions to work with StaticCompiler, so let's just skip this test on 1.8 + function bumper_test(N::Int) + buf = AllocBuffer(MallocVector, sizeof(Float64) * N) + s = 0.0 + for i ∈ 1:N + # some excuse to reuse the same memory a bunch of times + @no_escape buf begin + v = @alloc(Float64, N) + v .= i + s += sum(v) + end + end + free(buf) + s + end + + @testset "Bumper.jl integration" begin + + path = compile_shlib(bumper_test, (Int,), "./") + ptr = Libdl.dlopen(path, Libdl.RTLD_LOCAL) + + fptr = Libdl.dlsym(ptr, "bumper_test") + + @test bumper_test(8) == @ccall($fptr(8::Int)::Float64) + end +end + @testset "Standalone Executable Integration" begin jlpath = joinpath(Sys.BINDIR, Base.julia_exename()) # Get path to julia executable @@ -105,7 +133,7 @@ cd(scratch) @warn "Could not compile $testpath/scripts/randn_matrix.jl" println(e) end - @static if Sys.isbsd() + if Sys.isbsd() @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 end @@ -119,14 +147,14 @@ cd(scratch) @warn "Could not run $(scratch)/randn_matrix" println(e) end - @static if Sys.isbsd() + if Sys.isbsd() @test isa(status, Base.Process) @test isa(status, Base.Process) && status.exitcode == 0 end end ## --- Test LoopVectorization integration - @static if LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2}) + if Bool(LoopVectorization.VectorizationBase.has_feature(Val{:x86_64_avx2})) let # Compile... status = -1 @@ -280,119 +308,33 @@ cd(scratch) ## --- Test interop - @static if Sys.isbsd() - let - # Compile... - status = -1 - try - isfile("interop") && rm("interop") - status = run(`$jlpath --startup=no --compile=min $testpath/scripts/interop.jl`) - catch e - @warn "Could not compile $testpath/scripts/interop.jl" - println(e) - end - @test isa(status, Base.Process) - @test isa(status, Base.Process) && status.exitcode == 0 - - # Run... - println("Interop:") - status = -1 - try - status = run(`./interop`) - catch e - @warn "Could not run $(scratch)/interop" - println(e) - end - @test isa(status, Base.Process) - @test isa(status, Base.Process) && status.exitcode == 0 - end - end - -end - -# Mixtape - -module SubFoo - -function f() - x = rand() - y = rand() - return x + y -end - -function stringfun(s1, s2) - return s1 * s2 -end - -function teststring() - return stringfun("ab", "c") == "abc" -end - -end - -struct MyMix <: CompilationContext end - -@testset "Mixtape" begin - # 101: How2Mix - - # A few little utility functions for working with Expr instances. - swap(e) = e - function swap(e::Expr) - new = MacroTools.postwalk(e) do s - isexpr(s, :call) || return s - s.args[1] == Base.rand || return s - return 4 - end - return new - end - - # This is pre-inference - you get to see a CodeInfoTools.Builder instance. - function StaticCompiler.transform(::MyMix, src) - b = CodeInfoTools.Builder(src) - for (v, st) in b - b[v] = swap(st) - end - return CodeInfoTools.finish(b) - end + if Sys.isbsd() + let + # Compile... + status = -1 + try + isfile("interop") && rm("interop") + status = run(`$jlpath --startup=no --compile=min $testpath/scripts/interop.jl`) + catch e + @warn "Could not compile $testpath/scripts/interop.jl" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 - # MyMix will only transform functions which you explicitly allow. - # You can also greenlight modules. - StaticCompiler.allow(ctx::MyMix, m::Module) = m == SubFoo - - _, path = compile(SubFoo.f, (), mixtape = MyMix()) - @test load_function(path)() == 8 - @test SubFoo.f() != 8 - - # redefine swap to test caching and add StaticString substitution - function swap(e::Expr) - new = MacroTools.postwalk(e) do s - s isa String && return StaticTools.StaticString(tuple(codeunits(s)..., 0x00)) - isexpr(s, :call) || return s - s.args[1] == Base.rand || return s - return 2 + # Run... + println("Interop:") + status = -1 + try + status = run(`./interop`) + catch e + @warn "Could not run $(scratch)/interop" + println(e) + end + @test isa(status, Base.Process) + @test isa(status, Base.Process) && status.exitcode == 0 end - return new end - _, path = compile(SubFoo.f, (), mixtape = MyMix()) - @test load_function(path)() == 4 - - _, path = compile(SubFoo.teststring, (), mixtape = MyMix()) - @test load_function(path)() - -end - -@testset "Cross compiling to WebAssembly" begin - testpath = pwd() - scratch = tempdir() - cd(scratch) - - m2(x) = 2x - m3(x) = 3x - wasm_path = compile_wasm(m2, Tuple{Float64}) - wasm_path2 = compile_wasm([(m2, Tuple{Float64}), (m3, Tuple{Float64})]) - - wasm_path = compile_wasm(m2, (Float64,)) - wasm_path2 = compile_wasm([(m2, (Float64,)), (m3, (Float64,))]) end