diff --git a/test/Project.toml b/test/Project.toml index 90670d48..1e877a7e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -10,6 +10,7 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NEO_jll = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd" +ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -21,3 +22,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5" oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36" + +[compat] +ParallelTestRunner = "2.2" diff --git a/test/execution.jl b/test/execution.jl index cd3db014..1e2e1797 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -108,10 +108,10 @@ end export external_dummy external_dummy() = return end - import ...KernelModule + import .KernelModule @oneapi KernelModule.external_dummy() @eval begin - using ...KernelModule + using .KernelModule @oneapi external_dummy() end diff --git a/test/runtests.jl b/test/runtests.jl index 36773dc5..06d77db2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,390 +1,114 @@ -using Distributed -using Dates -import REPL -using Printf: @sprintf -using Base.Filesystem: path_separator +# using Distributed +# using Dates +# import REPL +# using Printf: @sprintf +# using Base.Filesystem: path_separator -# parse some command-line arguments -function extract_flag!(args, flag, default=nothing) - for f in args - if startswith(f, flag) - # Check if it's just `--flag` or if it's `--flag=foo` - if f != flag - val = split(f, '=')[2] - if default !== nothing && !(typeof(default) <: AbstractString) - val = parse(typeof(default), val) - end - else - val = default - end - - # Drop this value from our args - filter!(x -> x != f, args) - return (true, val) - end - end - return (false, default) -end -do_help, _ = extract_flag!(ARGS, "--help") -if do_help - println(""" - Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...] - - --help Show this text. - --list List all available tests. - --quickfail Fail the entire run as soon as a single test errored. - --jobs=N Launch `N` processes to perform tests (default: Sys.CPU_THREADS). +using ParallelTestRunner +using oneAPI - Remaining arguments filter the tests that will be executed.""") - exit(0) -end -_, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS) -do_quickfail, _ = extract_flag!(ARGS, "--quickfail") +oneAPI.functional() || error("oneAPI.jl is not functional on this system") -include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->oneAPI.versioninfo(io)) if Sys.islinux() -@info "Using oneAPI support library at " * oneAPI.Support.liboneapi_support + @info "Using oneAPI support library at " * oneAPI.Support.liboneapi_support end -@info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." # choose tests -const tests = [] -const test_runners = Dict() -## files in the test folder -for (rootpath, dirs, files) in walkdir(@__DIR__) - # find Julia files - filter!(files) do file - endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl" - end - isempty(files) && continue - - # strip extension - files = map(files) do file - file[1:end-3] - end - - # prepend subdir - subdir = relpath(rootpath, @__DIR__) - if subdir != "." - files = map(files) do file - joinpath(subdir, file) - end - end - - # unify path separators - files = map(files) do file - replace(file, path_separator => '/') - end - - append!(tests, files) - for file in files - test_runners[file] = ()->include("$(@__DIR__)/$file.jl") - end -end -sort!(tests; by=(file)->stat("$(@__DIR__)/$file.jl").size, rev=true) -## GPUArrays testsuite +testsuite = find_tests(@__DIR__) +## GPUArrays test suite +import GPUArrays +gpuarrays = pathof(GPUArrays) +gpuarrays_root = dirname(dirname(gpuarrays)) +gpuarrays_testsuite = joinpath(gpuarrays_root, "test", "testsuite.jl") +include(gpuarrays_testsuite) for name in keys(TestSuite.tests) - pushfirst!(tests, "gpuarrays/$name") - test_runners["gpuarrays/$name"] = ()->TestSuite.tests[name](oneArray) + testsuite["gpuarrays/$name"] = :(TestSuite.tests[$name](oneArray)) end -## finalize -unique!(tests) -# parse some more command-line arguments -## --list to list all available tests -do_list, _ = extract_flag!(ARGS, "--list") -if do_list - println("Available tests:") - for test in sort(tests) - println(" - $test") - end - exit(0) -end -## no options should remain -optlike_args = filter(startswith("-"), ARGS) -if !isempty(optlike_args) - error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)") -end -## the remaining args filter tests -if !isempty(ARGS) - filter!(tests) do test - any(arg->startswith(test, arg), ARGS) - end -end +args = parse_args(ARGS) -# add workers -const test_exeflags = Base.julia_cmd() -filter!(test_exeflags.exec) do c - return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds")) -end -push!(test_exeflags.exec, "--check-bounds=yes") -push!(test_exeflags.exec, "--startup-file=no") -push!(test_exeflags.exec, "--depwarn=yes") -push!(test_exeflags.exec, "--project=$(Base.active_project())") -const test_exename = popfirst!(test_exeflags.exec) -function addworker(X; kwargs...) - withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do - procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...) - @everywhere procs include($(joinpath(@__DIR__, "setup.jl"))) - procs - end -end -addworker(min(jobs, length(tests))) +init_worker_code = quote + using oneAPI, Adapt -# pretty print information about gc and mem usage -testgroupheader = "Test" -workerheader = "(Worker)" -name_align = maximum([textwidth(testgroupheader) + textwidth(" ") + - textwidth(workerheader); map(x -> textwidth(x) + - 3 + ndigits(nworkers()), tests)]) -elapsed_align = textwidth("Time (s)") -gc_align = textwidth("GC (s)") -percent_align = textwidth("GC %") -alloc_align = textwidth("Alloc (MB)") -rss_align = textwidth("RSS (MB)") -printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ") -printstyled(" | ---------------- CPU ---------------- |\n", color=:white) -printstyled(testgroupheader, color=:white) -printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white) -printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white) -print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock() -if stderr isa Base.LibuvStream - stderr.lock = print_lock -end -function print_testworker_stats(test, wrkr, resp) - @nospecialize resp - lock(print_lock) - try - printstyled(test, color=:white) - printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white) - time_str = @sprintf("%7.2f",resp[2]) - printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white) + import GPUArrays + include($gpuarrays_testsuite) + testf(f, xs...; kwargs...) = TestSuite.compare(f, oneArray, xs...; kwargs...) - cpu_gc_str = @sprintf("%5.2f", resp[4]) - printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white) - # since there may be quite a few digits in the percentage, - # the left-padding here is less to make sure everything fits - cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2]) - printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white) - cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20) - printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white) + const eltypes = [Int16, Int32, Int64, + Complex{Int16}, Complex{Int32}, Complex{Int64}, + Float16, Float32, + ComplexF32] - cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20) - printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white) - finally - unlock(print_lock) + const float16_supported = oneL0.module_properties(device()).fp16flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP16 == oneL0.ZE_DEVICE_MODULE_FLAG_FP16 + if float16_supported + append!(eltypes, [#=Float16,=# ComplexF16]) end -end -global print_testworker_started = (name, wrkr)->begin -end -function print_testworker_errored(name, wrkr) - lock(print_lock) - try - printstyled(name, color=:red) - printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |", - " "^elapsed_align, " failed at $(now())\n", color=:red) - finally - unlock(print_lock) + const float64_supported = oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 == oneL0.ZE_DEVICE_MODULE_FLAG_FP64 + if float64_supported + append!(eltypes, [Float64, ComplexF64]) end -end + TestSuite.supported_eltypes(::Type{<:oneArray}) = eltypes + + + const validation_layer = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false")) + const parameter_validation = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false")) -# run tasks -t0 = now() -results = [] -all_tasks = Task[] -all_tests = copy(tests) -try - # Monitor stdin and kill this task on ^C - # but don't do this on Windows, because it may deadlock in the kernel - t = current_task() - running_tests = Dict{String, DateTime}() - if !Sys.iswindows() && isa(stdin, Base.TTY) - stdin_monitor = @async begin - term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr) - try - REPL.Terminals.raw!(term, true) - while true - c = read(term, Char) - if c == '\x3' - Base.throwto(t, InterruptException()) - break - elseif c == '?' - println("Currently running: ") - tests = sort(collect(running_tests), by=x->x[2]) - foreach(tests) do (test, date) - println(test, " (running for ", round(now()-date, Minute), ")") - end + # NOTE: based on test/pkg.jl::capture_stdout, but doesn't discard exceptions + macro grab_output(ex) + quote + mktemp() do fname, fout + ret = nothing + open(fname, "w") do fout + redirect_stdout(fout) do + ret = $(esc(ex)) end end - catch e - isa(e, InterruptException) || rethrow() - finally - REPL.Terminals.raw!(term, false) + ret, read(fname, String) end end end - @sync begin - function recycle_worker(p) - rmprocs(p, waitfor=30) - return nothing - end - - for p in workers() - @async begin - push!(all_tasks, current_task()) - while length(tests) > 0 - test = popfirst!(tests) - - # sometimes a worker failed, and we need to spawn a new one - if p === nothing - p = addworker(1)[1] - end - wrkr = p - - local resp - - # run the test - running_tests[test] = now() - try - resp = remotecall_fetch(runtests, wrkr, test_runners[test], test) - catch e - isa(e, InterruptException) && return - resp = Any[e] - end - delete!(running_tests, test) - push!(results, (test, resp)) - # act on the results - if resp[1] isa Exception - print_testworker_errored(test, wrkr) - do_quickfail && Base.throwto(t, InterruptException()) - - # the worker encountered some failure, recycle it - # so future tests get a fresh environment - p = recycle_worker(p) - else - print_testworker_stats(test, wrkr, resp) - - cpu_rss = resp[6] - if haskey(ENV, "CI") && cpu_rss > 3*2^30 - # XXX: collecting garbage - # after each test, we are leaking CPU memory somewhere. - # this is a problem on CI, where2 we don't have much RAM. - # work around this by periodically recycling the worker. - p = recycle_worker(p) - end - end + # Run some code on-device + macro on_device(ex...) + code = ex[end] + kwargs = ex[1:end-1] + + @gensym kernel + esc(quote + let + function $kernel() + $code + return end - if p !== nothing - recycle_worker(p) - end + oneAPI.@sync @oneapi $(kwargs...) $kernel() end - end + end) end -catch e - isa(e, InterruptException) || rethrow() - # If the test suite was merely interrupted, still print the - # summary, which can be useful to diagnose what's going on - foreach(task -> begin - istaskstarted(task) || return - istaskdone(task) && return - try - schedule(task, InterruptException(); error=true) - catch ex - @error "InterruptException" exception=ex,catch_backtrace() - end - end, all_tasks) - for t in all_tasks - # NOTE: we can't just wait, but need to discard the exception, - # because the throwto for --quickfail also kills the worker. - try - wait(t) - catch e - showerror(stderr, e) - end - end -finally - if @isdefined stdin_monitor - schedule(stdin_monitor, InterruptException(); error=true) - end -end -t1 = now() -elapsed = canonicalize(Dates.CompoundPeriod(t1-t0)) -println("Testing finished in $elapsed") -# construct a testset to render the test results -o_ts = Test.DefaultTestSet("Overall") -Test.push_testset(o_ts) -completed_tests = Set{String}() -for (testname, (resp,)) in results - push!(completed_tests, testname) - if isa(resp, Test.DefaultTestSet) - Test.push_testset(resp) - Test.record(o_ts, resp) - Test.pop_testset() - elseif isa(resp, Tuple{Int,Int}) - fake = Test.DefaultTestSet(testname) - for i in 1:resp[1] - Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) - end - for i in 1:resp[2] - Test.record(fake, Test.Broken(:test, nothing)) - end - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() - elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException) - println("Worker $(resp.pid) failed running test $(testname):") - Base.showerror(stdout, resp.captured) - println() - fake = Test.DefaultTestSet(testname) - for i in 1:resp.captured.ex.pass - Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) - end - for i in 1:resp.captured.ex.broken - Test.record(fake, Test.Broken(:test, nothing)) - end - for t in resp.captured.ex.errors_and_fails - Test.record(fake, t) - end - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() - else - if !isa(resp, Exception) - resp = ErrorException(string("Unknown result type : ", typeof(resp))) - end - # If this test raised an exception that is not a remote testset exception, - # i.e. not a RemoteException capturing a TestSetException that means - # the test runner itself had some problem, so we may have hit a segfault, - # deserialization errors or something similar. Record this testset as Errored. - fake = Test.DefaultTestSet(testname) - Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1))) - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() - end + # helper function for sinking a value to prevent the callee from getting optimized away + @inline sink(i::Int32) = + Base.llvmcall("""%slot = alloca i32 + store volatile i32 %0, i32* %slot + %value = load volatile i32, i32* %slot + ret i32 %value""", Int32, Tuple{Int32}, i) + @inline sink(i::Int64) = + Base.llvmcall("""%slot = alloca i64 + store volatile i64 %0, i64* %slot + %value = load volatile i64, i64* %slot + ret i64 %value""", Int64, Tuple{Int64}, i) end -for test in all_tests - (test in completed_tests) && continue - fake = Test.DefaultTestSet(test) - Test.record(fake, Test.Error(:test_interrupted, test, nothing, - [("skipped", [])], LineNumberNode(1))) - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() -end -println() -Test.print_test_results(o_ts, 1) -if !o_ts.anynonpass - println(" \033[32;1mSUCCESS\033[0m") -else - println(" \033[31;1mFAILURE\033[0m\n") - Test.print_test_errors(o_ts) - throw(Test.FallbackTestSetException("Test run finished with errors")) + +init_code = quote + using oneAPI, Adapt + + import ..TestSuite, ..testf + import ..eltypes, ..float16_supported, ..float64_supported, + ..validation_layer, ..parameter_validation, + ..@grab_output, ..@on_device, ..sink end +runtests(oneAPI, args; testsuite, init_code, init_worker_code) diff --git a/test/setup.jl b/test/setup.jl deleted file mode 100644 index 269d5b9c..00000000 --- a/test/setup.jl +++ /dev/null @@ -1,136 +0,0 @@ -using Distributed, Test, oneAPI - -oneAPI.functional() || error("oneAPI.jl is not functional on this system") - -# GPUArrays has a testsuite that isn't part of the main package. -# Include it directly. -import GPUArrays -gpuarrays = pathof(GPUArrays) -gpuarrays_root = dirname(dirname(gpuarrays)) -include(joinpath(gpuarrays_root, "test", "testsuite.jl")) -testf(f, xs...; kwargs...) = TestSuite.compare(f, oneArray, xs...; kwargs...) - -const eltypes = [Int16, Int32, Int64, - Complex{Int16}, Complex{Int32}, Complex{Int64}, - Float16, Float32, - ComplexF32] -const float16_supported = oneL0.module_properties(device()).fp16flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP16 == oneL0.ZE_DEVICE_MODULE_FLAG_FP16 -if float16_supported - append!(eltypes, [#=Float16,=# ComplexF16]) -end -const float64_supported = oneL0.module_properties(device()).fp64flags & oneL0.ZE_DEVICE_MODULE_FLAG_FP64 == oneL0.ZE_DEVICE_MODULE_FLAG_FP64 -if float64_supported - append!(eltypes, [Float64, ComplexF64]) -end -TestSuite.supported_eltypes(::Type{<:oneArray}) = eltypes - -const validation_layer = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false")) -const parameter_validation = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false")) - -using Random - - -## entry point - -function runtests(f, name) - old_print_setting = Test.TESTSET_PRINT_ENABLE[] - Test.TESTSET_PRINT_ENABLE[] = false - - try - # generate a temporary module to execute the tests in - mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_')) - mod = @eval(Main, module $mod_name end) - @eval(mod, using Test, Random, oneAPI) - - let id = myid() - wait(@spawnat 1 print_testworker_started(name, id)) - end - - ex = quote - GC.gc(true) - Random.seed!(1) - oneAPI.allowscalar(false) - - @timed @testset $"$name" begin - $f() - end - end - data = Core.eval(mod, ex) - #data[1] is the testset - - # process results - cpu_rss = Sys.maxrss() - if VERSION >= v"1.11.0-DEV.1529" - tc = Test.get_test_counts(data[1]) - passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken = - tc.passes, tc.fails, tc.errors, tc.broken, tc.cumulative_passes, - tc.cumulative_fails, tc.cumulative_errors, tc.cumulative_broken - else - passes,fails,errors,broken,c_passes,c_fails,c_errors,c_broken = - Test.get_test_counts(data[1]) - end - if data[1].anynonpass == false - data = ((passes+c_passes,broken+c_broken), - data[2], - data[3], - data[4], - data[5]) - end - res = vcat(collect(data), cpu_rss) - - GC.gc(true) - res - finally - Test.TESTSET_PRINT_ENABLE[] = old_print_setting - end -end - - -## auxiliary stuff - -# NOTE: based on test/pkg.jl::capture_stdout, but doesn't discard exceptions -macro grab_output(ex) - quote - mktemp() do fname, fout - ret = nothing - open(fname, "w") do fout - redirect_stdout(fout) do - ret = $(esc(ex)) - end - end - ret, read(fname, String) - end - end -end - -# Run some code on-device -macro on_device(ex...) - code = ex[end] - kwargs = ex[1:end-1] - - @gensym kernel - esc(quote - let - function $kernel() - $code - return - end - - oneAPI.@sync @oneapi $(kwargs...) $kernel() - end - end) -end - -# helper function for sinking a value to prevent the callee from getting optimized away -@inline sink(i::Int32) = - Base.llvmcall("""%slot = alloca i32 - store volatile i32 %0, i32* %slot - %value = load volatile i32, i32* %slot - ret i32 %value""", Int32, Tuple{Int32}, i) -@inline sink(i::Int64) = - Base.llvmcall("""%slot = alloca i64 - store volatile i64 %0, i64* %slot - %value = load volatile i64, i64* %slot - ret i64 %value""", Int64, Tuple{Int64}, i) - -nothing # File is loaded via a remotecall to "include". Ensure it returns "nothing".