Aurora LTS 2523.40#574
Open
michel2323 wants to merge 7 commits into
Open
Conversation
Contributor
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/deps/generate_interfaces.jl b/deps/generate_interfaces.jl
index 8c62b75..108a001 100644
--- a/deps/generate_interfaces.jl
+++ b/deps/generate_interfaces.jl
@@ -337,34 +337,34 @@ function generate_headers(library::String, filename::Vector{String}, output::Str
end
end
- # Dedup: when two signatures map to the same C function name (because MKL
- # added an overload), keep the one with more parameters — typically the
- # newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
- # Without this the generated onemkl.cpp has duplicate function definitions
- # and won't compile.
- _fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:pos-1]))[end]))
- _param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[pos+1:ep-1]) + 1)
- keep_idx = Dict{String,Int}()
- keep_pc = Dict{String,Int}()
- for (i, sig) in enumerate(signatures)
- (sig[2] in blacklist) && continue
- fn = _fn_name(sig[1])
- pc = _param_cnt(sig[1])
- if !haskey(keep_idx, fn) || pc > keep_pc[fn]
- keep_idx[fn] = i
- keep_pc[fn] = pc
+ # Dedup: when two signatures map to the same C function name (because MKL
+ # added an overload), keep the one with more parameters — typically the
+ # newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
+ # Without this the generated onemkl.cpp has duplicate function definitions
+ # and won't compile.
+ _fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:(pos - 1)]))[end]))
+ _param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[(pos + 1):(ep - 1)]) + 1)
+ keep_idx = Dict{String, Int}()
+ keep_pc = Dict{String, Int}()
+ for (i, sig) in enumerate(signatures)
+ (sig[2] in blacklist) && continue
+ fn = _fn_name(sig[1])
+ pc = _param_cnt(sig[1])
+ if !haskey(keep_idx, fn) || pc > keep_pc[fn]
+ keep_idx[fn] = i
+ keep_pc[fn] = pc
+ end
end
- end
- keep_set = Set(values(keep_idx))
+ keep_set = Set(values(keep_idx))
path_oneapi_headers = joinpath(@__DIR__, output)
oneapi_headers = open(path_oneapi_headers, "w")
- for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
+ for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
# Blacklist
(name_routine in blacklist) && continue
- # Dedup
- (i in keep_set) || continue
+ # Dedup
+ (i in keep_set) || continue
# Pass scalars (e.g. alpha/beta inputs) as references instead of values
for type in ("short", "float", "double", "float _Complex", "double _Complex")
diff --git a/lib/level-zero/cmdlist.jl b/lib/level-zero/cmdlist.jl
index 24d1d3a..19c9aa3 100644
--- a/lib/level-zero/cmdlist.jl
+++ b/lib/level-zero/cmdlist.jl
@@ -57,7 +57,7 @@ end
# oversubscription matters more than speed.
const sync_each_submission = Ref{Bool}(false)
-function execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence=nothing)
+function execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence = nothing)
r = zeCommandQueueExecuteCommandLists(queue, length(lists), lists, something(fence, C_NULL))
sync_each_submission[] && synchronize(queue)
return r
diff --git a/lib/level-zero/oneL0.jl b/lib/level-zero/oneL0.jl
index 87a5e83..d79c6d0 100644
--- a/lib/level-zero/oneL0.jl
+++ b/lib/level-zero/oneL0.jl
@@ -217,7 +217,7 @@ function __init__()
validation_layer[] = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false"))
parameter_validation[] = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false"))
- sync_each_submission[] = lowercase(get(ENV, "ONEAPI_SYNC_EACH_SUBMISSION", "")) in ("1", "true", "yes")
+ return sync_each_submission[] = lowercase(get(ENV, "ONEAPI_SYNC_EACH_SUBMISSION", "")) in ("1", "true", "yes")
end
end
diff --git a/lib/support/liboneapi_support.jl b/lib/support/liboneapi_support.jl
index e1b7327..c3d9958 100644
--- a/lib/support/liboneapi_support.jl
+++ b/lib/support/liboneapi_support.jl
@@ -6429,194 +6429,240 @@ function onemklZunmqr_batch_scratchpad_size(device_queue, side, trans, m, n, k,
end
function onemklXsparse_init_matrix_handle(p_spmat)
- @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spmat::Ptr{matrix_handle_t})::Cint
+ return @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spmat::Ptr{matrix_handle_t})::Cint
end
function onemklXsparse_release_matrix_handle(device_queue, p_spmat)
@ccall liboneapi_support.onemklXsparse_release_matrix_handle(device_queue::syclQueue_t,
- p_spmat::Ptr{matrix_handle_t})::Cint
+ p_spmat::Ptr{matrix_handle_t}
+ )::Cint
end
-function onemklSsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklSsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklSsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{Cfloat})::Cint
end
-function onemklSsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklSsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklSsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{Cfloat})::Cint
end
-function onemklDsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklDsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklDsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{Cdouble})::Cint
end
-function onemklDsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklDsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklDsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{Cdouble})::Cint
end
-function onemklCsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklCsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklCsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{ComplexF32})::Cint
end
-function onemklCsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklCsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklCsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{ComplexF32})::Cint
end
-function onemklZsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklZsparse_set_csr_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
col_ind, values)
@ccall liboneapi_support.onemklZsparse_set_csr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ spmat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int32},
col_ind::ZePtr{Int32},
values::ZePtr{ComplexF64})::Cint
end
-function onemklZsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
- row_ptr, col_ind, values)
+function onemklZsparse_set_csr_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
+ row_ptr, col_ind, values
+ )
@ccall liboneapi_support.onemklZsparse_set_csr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
+ nnz::Int64, index::onemklIndex,
row_ptr::ZePtr{Int64},
col_ind::ZePtr{Int64},
values::ZePtr{ComplexF64})::Cint
end
-function onemklSsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklSsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{Cfloat})::Cint
-end
-
-function onemklSsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklSsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{Cfloat})::Cint
-end
-
-function onemklDsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklDsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{Cdouble})::Cint
-end
-
-function onemklDsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklDsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{Cdouble})::Cint
-end
-
-function onemklCsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklCsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklCsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklCsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
- row_ind, values)
- @ccall liboneapi_support.onemklZsparse_set_csc_data(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int32},
- row_ind::Ptr{Int32},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
- col_ptr, row_ind, values)
- @ccall liboneapi_support.onemklZsparse_set_csc_data_64(device_queue::syclQueue_t,
- spMat::matrix_handle_t,
- nrows::Int64, ncols::Int64,
- nnz::Int64, index::onemklIndex,
- col_ptr::Ptr{Int64},
- row_ind::Ptr{Int64},
- values::Ptr{ComplexF32})::Cint
-end
-
-function onemklSsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklSsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklSsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklDsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklDsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklCsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklCsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_csc_data(
+ device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+ row_ind, values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_csc_data(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int32},
+ row_ind::Ptr{Int32},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_csc_data_64(
+ device_queue, spMat, nrows, ncols, nnz, index,
+ col_ptr, row_ind, values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_csc_data_64(
+ device_queue::syclQueue_t,
+ spMat::matrix_handle_t,
+ nrows::Int64, ncols::Int64,
+ nnz::Int64, index::onemklIndex,
+ col_ptr::Ptr{Int64},
+ row_ind::Ptr{Int64},
+ values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklSsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklSsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6624,10 +6670,11 @@ function onemklSsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{Cfloat})::Cint
end
-function onemklSsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklSsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklSsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6635,10 +6682,11 @@ function onemklSsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{Cfloat})::Cint
end
-function onemklDsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklDsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklDsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6646,10 +6694,11 @@ function onemklDsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{Cdouble})::Cint
end
-function onemklDsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklDsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklDsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6657,10 +6706,11 @@ function onemklDsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{Cdouble})::Cint
end
-function onemklCsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklCsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklCsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6668,10 +6718,11 @@ function onemklCsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{ComplexF32})::Cint
end
-function onemklCsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklCsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklCsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6679,10 +6730,11 @@ function onemklCsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{ComplexF32})::Cint
end
-function onemklZsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklZsparse_set_coo_data(
+ device_queue, spmat, nrows, ncols, nnz, index, row_ind,
col_ind, values)
@ccall liboneapi_support.onemklZsparse_set_coo_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int32, ncols::Int32,
nnz::Int32, index::onemklIndex,
row_ind::ZePtr{Int32},
@@ -6690,10 +6742,11 @@ function onemklZsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
values::ZePtr{ComplexF64})::Cint
end
-function onemklZsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklZsparse_set_coo_data_64(
+ device_queue, spmat, nrows, ncols, nnz, index,
row_ind, col_ind, values)
@ccall liboneapi_support.onemklZsparse_set_coo_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
+ spmat::matrix_handle_t,
nrows::Int64, ncols::Int64,
nnz::Int64, index::onemklIndex,
row_ind::ZePtr{Int64},
@@ -6701,128 +6754,160 @@ function onemklZsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
values::ZePtr{ComplexF64})::Cint
end
-function onemklSsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklSsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{Cfloat})::Cint
-end
-
-function onemklSsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklSsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{Cfloat})::Cint
-end
-
-function onemklDsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklDsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{Cdouble})::Cint
-end
-
-function onemklDsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklDsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{Cdouble})::Cint
-end
-
-function onemklCsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklCsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklCsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklCsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklZsparse_set_bsr_data(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64, blk_ncols::Int64,
- blk_nnz::Int64, row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int32},
- bsr_col_ind::Ptr{Int32},
- bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
- row_blk_size, col_blk_size, blk_layout, index,
- bsr_row_ptr, bsr_col_ind, bsr_values)
- @ccall liboneapi_support.onemklZsparse_set_bsr_data_64(device_queue::syclQueue_t,
- spmat::matrix_handle_t,
- blk_nrows::Int64,
- blk_ncols::Int64, blk_nnz::Int64,
- row_blk_size::Int64,
- col_blk_size::Int64,
- blk_layout::onemklLayout,
- index::onemklIndex,
- bsr_row_ptr::Ptr{Int64},
- bsr_col_ind::Ptr{Int64},
- bsr_values::Ptr{ComplexF32})::Cint
+function onemklSsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklSsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklSsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{Cfloat}
+ )::Cint
+end
+
+function onemklDsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklDsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklDsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{Cdouble}
+ )::Cint
+end
+
+function onemklCsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklCsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklCsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_bsr_data(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_bsr_data(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64, blk_ncols::Int64,
+ blk_nnz::Int64, row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int32},
+ bsr_col_ind::Ptr{Int32},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
+end
+
+function onemklZsparse_set_bsr_data_64(
+ device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+ row_blk_size, col_blk_size, blk_layout, index,
+ bsr_row_ptr, bsr_col_ind, bsr_values
+ )
+ return @ccall liboneapi_support.onemklZsparse_set_bsr_data_64(
+ device_queue::syclQueue_t,
+ spmat::matrix_handle_t,
+ blk_nrows::Int64,
+ blk_ncols::Int64, blk_nnz::Int64,
+ row_blk_size::Int64,
+ col_blk_size::Int64,
+ blk_layout::onemklLayout,
+ index::onemklIndex,
+ bsr_row_ptr::Ptr{Int64},
+ bsr_col_ind::Ptr{Int64},
+ bsr_values::Ptr{ComplexF32}
+ )::Cint
end
function onemklXsparse_init_matmat_descr(p_desc)
diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl
index adc17d6..e7088ab 100644
--- a/src/compiler/compilation.jl
+++ b/src/compiler/compilation.jl
@@ -197,7 +197,7 @@ end
# TODO: emit printf format strings in constant memory
extensions = String[
"SPV_EXT_relaxed_printf_string_address_space",
- "SPV_EXT_shader_atomic_float_add"
+ "SPV_EXT_shader_atomic_float_add",
]
else
backend = :llvm
diff --git a/src/context.jl b/src/context.jl
index bc5ac70..a84e8c4 100644
--- a/src/context.jl
+++ b/src/context.jl
@@ -232,8 +232,10 @@ function global_queue(ctx::ZeContext, dev::ZeDevice)
GC.enable_finalizers(false)
try
@lock queue_registry_lock begin
- push!(get!(Vector{Tuple{WeakRef,ZeCommandQueue}}, queue_registry, (ctx, dev)),
- (WeakRef(current_task()), queue))
+ push!(
+ get!(Vector{Tuple{WeakRef, ZeCommandQueue}}, queue_registry, (ctx, dev)),
+ (WeakRef(current_task()), queue)
+ )
end
finally
GC.enable_finalizers(true)
@@ -255,17 +257,17 @@ end
# task-local, so once their task is dead no new work can reach them, and the entry can
# be dropped (allowing the queue to be finalized) after a final synchronize.
const queue_registry_lock = ReentrantLock()
-const queue_registry = Dict{Tuple{ZeContext,ZeDevice},Vector{Tuple{WeakRef,ZeCommandQueue}}}()
+const queue_registry = Dict{Tuple{ZeContext, ZeDevice}, Vector{Tuple{WeakRef, ZeCommandQueue}}}()
# synchronize all known queues that target the given context (and device, if specified),
# i.e., all queues whose in-flight work could possibly reference an allocation that is
# about to be freed.
-function synchronize_all_queues(ctx::ZeContext, dev::Union{ZeDevice,Nothing})
+function synchronize_all_queues(ctx::ZeContext, dev::Union{ZeDevice, Nothing})
# only the LTS stack populates the queue registry (see `global_queue`); on the
# rolling stack this is a no-op and `release` frees directly.
oneL0.LTS[] || return
queues = ZeCommandQueue[]
- stale = Tuple{WeakRef,ZeCommandQueue}[]
+ stale = Tuple{WeakRef, ZeCommandQueue}[]
GC.enable_finalizers(false)
try
@lock queue_registry_lock begin
diff --git a/src/mapreduce.jl b/src/mapreduce.jl
index d03d72f..fca02a7 100644
--- a/src/mapreduce.jl
+++ b/src/mapreduce.jl
@@ -160,7 +160,7 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::oneWrappedArray{T},
# dense array first so every global read in the reduction kernel is coalesced.
if oneL0.LTS[] && !_dense_reduce_input(A)
Acontig = Broadcast.materialize(Broadcast.broadcasted(f, A))
- return GPUArrays.mapreducedim!(identity, op, R, Acontig; init=init)
+ return GPUArrays.mapreducedim!(identity, op, R, Acontig; init = init)
end
# add singleton dimensions to the output container, if needed
@@ -192,8 +192,9 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::oneWrappedArray{T},
if oneL0.LTS[] && size(Rreduce, 1) == 1
items = clamp(length(Rother), 1, 256)
groups = min(cld(length(Rother), items), 1024)
- @oneapi items=items groups=groups coalesced_mapreduce_device(
- f, op, init, Rreduce, Rother, R′, A)
+ @oneapi items = items groups = groups coalesced_mapreduce_device(
+ f, op, init, Rreduce, Rother, R′, A
+ )
return R
end
diff --git a/src/utils.jl b/src/utils.jl
index e7d232e..551ff49 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -2,8 +2,10 @@
function versioninfo(io::IO=stdout)
if Sys.islinux()
println(io, "Binary dependencies:")
- for jll in [oneL0.NEO_jll, oneL0.NEO_jll.libigc_LTS_jll, oneL0.NEO_jll.gmmlib_jll,
- SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll, oneAPI_Support_jll]
+ for jll in [
+ oneL0.NEO_jll, oneL0.NEO_jll.libigc_LTS_jll, oneL0.NEO_jll.gmmlib_jll,
+ SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll, oneAPI_Support_jll,
+ ]
name = string(jll)
print(io, "- $(name[1:end-4]): $(Base.pkgversion(jll))")
if jll.host_platform !== nothing
diff --git a/test/onemkl.jl b/test/onemkl.jl
index 4a4c7fc..518c179 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -14,7 +14,7 @@ k = 13
@testset "Version" begin
version_onemkl = oneMKL.version()
- @test version_onemkl ≥ v"2025.2.0"
+ @test version_onemkl ≥ v"2025.2.0"
end
############################################################################################ |
eec2785 to
991d29e
Compare
Codecov Report❌ Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #574 +/- ##
==========================================
+ Coverage 79.02% 80.96% +1.94%
==========================================
Files 49 49
Lines 3333 3321 -12
==========================================
+ Hits 2634 2689 +55
+ Misses 699 632 -67 ☔ View full report in Codecov by Harness. 🚀 New features to boost your workflow:
|
Aurora ships the Intel "LTS" GPU software stack rather than the rolling
release that upstream oneAPI.jl targets. Pin the whole toolchain to it:
- JLLs: NEO_LTS_jll =25.18.33578, oneAPI_Level_Zero_Loader_LTS_jll
=1.24, oneAPI_Level_Zero_Headers_LTS_jll, libigc_LTS_jll, and the
oneMKL 2025.3.1 support library. lib/level-zero/oneL0.jl aliases the
*_LTS_jll packages back to their canonical names so the rest of the
code is unchanged.
- Keep the SPIR-V Translator (SPIRV_LLVM_Translator_jll 21,
SPIRVIntrinsics 0.5). The LTS NEO/IGC runtime does not accept the
output of upstream's LLVM SPIR-V back-end (#491), so revert the
back-end switch in src/compiler/compilation.jl, src/oneAPI.jl and
src/utils.jl and restore the explicit SPIR-V extension list.
- Regenerate the oneMKL C bindings against 2025.3.1 (deps/src/onemkl.*,
lib/support/liboneapi_support.jl, deps/generate_interfaces.jl) and
fix the sparse CSR argument types (ZePtr/onemklIndex) in the
regenerated bindings and res/support.toml. Stay on oneMKL 2025.3.1,
declining the upstream oneAPI 2026.0.0 migration (#582).
- Prepend NEO's libze_intel_gpu directory to LD_LIBRARY_PATH in
__init__ so libsycl's bundled ze_lib finds the driver by path when
no system NEO is installed.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
The LTS IGC silently miscompiles strided memory accesses inside the
reduction kernels, producing wrong results with no error. Add two
complementary guards in src/mapreduce.jl:
- A coalesced reduction kernel for the case where the contiguous
leading dimension is reduced (size(Rreduce, 1) == 1, e.g.
sum(A; dims=2)), avoiding the strided per-thread loads.
- Materialize strided / non-dense inputs (Transpose, Adjoint,
PermutedDimsArray, SubArray, ...) to a dense array before reducing,
via _dense_reduce_input; this fixes e.g. `a == transpose(b)`.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
On the LTS NEO stack (25.18) freeing a buffer does not drain queues that
still have work in flight referencing it: the in-flight kernel then
faults and the context is banned, surfacing later as a
ZE_RESULT_ERROR_UNKNOWN at an unrelated op. global_queue is task-local,
so a test file's task can also die with work still queued.
- Register every global queue in a per-(context,device) registry that
holds the queue strongly, keyed by a weak reference to the owning
task (src/context.jl). A WeakRef to the queue would be cleared in the
same GC cycle that queues its finalizer, hiding it from release
exactly when its in-flight work still references buffers being freed.
- Before any BLOCKING_FREE, synchronize all queues that could
reference the buffer (src/pool.jl, synchronize_all_queues).
- Synchronize outside the registry lock with finalizers disabled, skip
already-finalized queues, and retire queues of dead tasks.
- In the queue finalizer, drain (unchecked, since a banned context
returns an error) then destroy, and null the handle so a concurrent
synchronize_all_queues skips it (lib/level-zero/cmdqueue.jl).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
The DFT wrappers passed pointer(lengths) / pointer(strides) to the descriptor create/set calls. A raw Ptr does not keep the backing vector alive, so the GC could collect it mid-call and oneMKL would read garbage dimensions/strides, failing commit with FFT_INVALID_DESCRIPTOR or a SIGFPE depending on heap reuse. Pass the arrays themselves so ccall roots them for the duration of the call (lib/mkl/fft.jl). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
The Aurora LTS NEO stack intermittently drops the tail of a command list, silently corrupting results. Add an off-by-default workaround (ONEAPI_SYNC_EACH_SUBMISSION=1) that synchronizes the queue after every command-list submission (lib/level-zero/cmdlist.jl, parsed in lib/level-zero/oneL0.jl __init__). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Set up the GitHub Actions job for the Aurora LTS self-runner
(.github/workflows/ci.yml):
- Enable per-worker GPU spreading (ONEAPI_TEST_SPREAD_GPUS=1) and the
per-submission synchronize workaround (ONEAPI_SYNC_EACH_SUBMISSION=1).
- Run the test step through `julia -C native,-avx512fp16` to avoid
Sapphire Rapids AVX512-FP16 host miscompilation of Float16 work under
concurrent oneMKL load (the GPU results are correct; the corruption
is on the host CPU).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Make the Intel LTS-stack support opt-in so this branch can eventually merge
into main without affecting the rolling stack: with the flag off, every LTS
code path is bypassed and behavior matches upstream.
Introduce oneL0.LTS[], resolved at the top of oneL0.__init__ from ONEAPI_LTS
(default on for this branch; set ONEAPI_LTS=0 for the rolling-stack paths).
Resolved before the driver-availability early returns so it is set even on a
host without a functional GPU. Gate on it:
- Layer 1 (behavior): the strided-reduction materialization and the
coalesced-kernel dispatch in mapreducedim!; the command-queue registry
(global_queue / synchronize_all_queues), the pre-free synchronize in
release, and the queue-finalizer drain + handle-null in cmdqueue.jl.
- Layer 2 (codegen): _compiler_config now selects the SPIRVCompilerTarget
backend (:khronos translator vs :llvm back-end) and the SPIR-V extension
list from the flag. GPUCompiler loads the tool lazily, so both SPIR-V JLLs
can coexist as deps and the choice is made at compile time.
Deferred to layer 3: driver-JLL selection (NEO/loader/headers/igc) via
Preferences and the oneMKL support-library ABI. The oneMKL FFT GC-rooting is
left as-is (already fixed upstream via GC.@preserve).
Verified: precompiles and loads; ONEAPI_LTS unset/1 -> on, ONEAPI_LTS=0 -> off.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
32d11d7 to
b13cc3e
Compare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.