Skip to content

Aurora LTS 2523.40#574

Open
michel2323 wants to merge 7 commits into
mainfrom
aurora-lts-2523.40
Open

Aurora LTS 2523.40#574
michel2323 wants to merge 7 commits into
mainfrom
aurora-lts-2523.40

Conversation

@michel2323

Copy link
Copy Markdown
Member

No description provided.

@github-actions

github-actions Bot commented May 11, 2026

Copy link
Copy Markdown
Contributor

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic main) to apply these changes.

Click here to view the suggested changes.
diff --git a/deps/generate_interfaces.jl b/deps/generate_interfaces.jl
index 8c62b75..108a001 100644
--- a/deps/generate_interfaces.jl
+++ b/deps/generate_interfaces.jl
@@ -337,34 +337,34 @@ function generate_headers(library::String, filename::Vector{String}, output::Str
     end
   end
 
-  # Dedup: when two signatures map to the same C function name (because MKL
-  # added an overload), keep the one with more parameters — typically the
-  # newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
-  # Without this the generated onemkl.cpp has duplicate function definitions
-  # and won't compile.
-  _fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:pos-1]))[end]))
-  _param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[pos+1:ep-1]) + 1)
-  keep_idx = Dict{String,Int}()
-  keep_pc  = Dict{String,Int}()
-  for (i, sig) in enumerate(signatures)
-    (sig[2] in blacklist) && continue
-    fn = _fn_name(sig[1])
-    pc = _param_cnt(sig[1])
-    if !haskey(keep_idx, fn) || pc > keep_pc[fn]
-      keep_idx[fn] = i
-      keep_pc[fn]  = pc
+    # Dedup: when two signatures map to the same C function name (because MKL
+    # added an overload), keep the one with more parameters — typically the
+    # newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
+    # Without this the generated onemkl.cpp has duplicate function definitions
+    # and won't compile.
+    _fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:(pos - 1)]))[end]))
+    _param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[(pos + 1):(ep - 1)]) + 1)
+    keep_idx = Dict{String, Int}()
+    keep_pc = Dict{String, Int}()
+    for (i, sig) in enumerate(signatures)
+        (sig[2] in blacklist) && continue
+        fn = _fn_name(sig[1])
+        pc = _param_cnt(sig[1])
+        if !haskey(keep_idx, fn) || pc > keep_pc[fn]
+            keep_idx[fn] = i
+            keep_pc[fn] = pc
+        end
     end
-  end
-  keep_set = Set(values(keep_idx))
+    keep_set = Set(values(keep_idx))
 
   path_oneapi_headers = joinpath(@__DIR__, output)
   oneapi_headers = open(path_oneapi_headers, "w")
 
-  for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
+    for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
     # Blacklist
     (name_routine in blacklist) && continue
-    # Dedup
-    (i in keep_set) || continue
+        # Dedup
+        (i in keep_set) || continue
 
     # Pass scalars (e.g. alpha/beta inputs) as references instead of values
     for type in ("short", "float", "double", "float _Complex", "double _Complex")
diff --git a/lib/level-zero/cmdlist.jl b/lib/level-zero/cmdlist.jl
index 24d1d3a..19c9aa3 100644
--- a/lib/level-zero/cmdlist.jl
+++ b/lib/level-zero/cmdlist.jl
@@ -57,7 +57,7 @@ end
 # oversubscription matters more than speed.
 const sync_each_submission = Ref{Bool}(false)
 
-function execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence=nothing)
+function execute!(queue::ZeCommandQueue, lists::Vector{ZeCommandList}, fence = nothing)
     r = zeCommandQueueExecuteCommandLists(queue, length(lists), lists, something(fence, C_NULL))
     sync_each_submission[] && synchronize(queue)
     return r
diff --git a/lib/level-zero/oneL0.jl b/lib/level-zero/oneL0.jl
index 87a5e83..d79c6d0 100644
--- a/lib/level-zero/oneL0.jl
+++ b/lib/level-zero/oneL0.jl
@@ -217,7 +217,7 @@ function __init__()
 
     validation_layer[] = parse(Bool, get(ENV, "ZE_ENABLE_VALIDATION_LAYER", "false"))
     parameter_validation[] = parse(Bool, get(ENV, "ZE_ENABLE_PARAMETER_VALIDATION", "false"))
-    sync_each_submission[] = lowercase(get(ENV, "ONEAPI_SYNC_EACH_SUBMISSION", "")) in ("1", "true", "yes")
+    return sync_each_submission[] = lowercase(get(ENV, "ONEAPI_SYNC_EACH_SUBMISSION", "")) in ("1", "true", "yes")
 end
 
 end
diff --git a/lib/support/liboneapi_support.jl b/lib/support/liboneapi_support.jl
index e1b7327..c3d9958 100644
--- a/lib/support/liboneapi_support.jl
+++ b/lib/support/liboneapi_support.jl
@@ -6429,194 +6429,240 @@ function onemklZunmqr_batch_scratchpad_size(device_queue, side, trans, m, n, k,
 end
 
 function onemklXsparse_init_matrix_handle(p_spmat)
-    @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spmat::Ptr{matrix_handle_t})::Cint
+    return @ccall liboneapi_support.onemklXsparse_init_matrix_handle(p_spmat::Ptr{matrix_handle_t})::Cint
 end
 
 function onemklXsparse_release_matrix_handle(device_queue, p_spmat)
     @ccall liboneapi_support.onemklXsparse_release_matrix_handle(device_queue::syclQueue_t,
-                                                                 p_spmat::Ptr{matrix_handle_t})::Cint
+        p_spmat::Ptr{matrix_handle_t}
+    )::Cint
 end
 
-function onemklSsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklSsparse_set_csr_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
                                     col_ind, values)
     @ccall liboneapi_support.onemklSsparse_set_csr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
+        spmat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
                                                         row_ptr::ZePtr{Int32},
                                                         col_ind::ZePtr{Int32},
                                                         values::ZePtr{Cfloat})::Cint
 end
 
-function onemklSsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
-                                       row_ptr, col_ind, values)
+function onemklSsparse_set_csr_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
+        row_ptr, col_ind, values
+    )
     @ccall liboneapi_support.onemklSsparse_set_csr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
+        nnz::Int64, index::onemklIndex,
                                                            row_ptr::ZePtr{Int64},
                                                            col_ind::ZePtr{Int64},
                                                            values::ZePtr{Cfloat})::Cint
 end
 
-function onemklDsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklDsparse_set_csr_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
                                     col_ind, values)
     @ccall liboneapi_support.onemklDsparse_set_csr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
+        spmat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
                                                         row_ptr::ZePtr{Int32},
                                                         col_ind::ZePtr{Int32},
                                                         values::ZePtr{Cdouble})::Cint
 end
 
-function onemklDsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
-                                       row_ptr, col_ind, values)
+function onemklDsparse_set_csr_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
+        row_ptr, col_ind, values
+    )
     @ccall liboneapi_support.onemklDsparse_set_csr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
+        nnz::Int64, index::onemklIndex,
                                                            row_ptr::ZePtr{Int64},
                                                            col_ind::ZePtr{Int64},
                                                            values::ZePtr{Cdouble})::Cint
 end
 
-function onemklCsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklCsparse_set_csr_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
                                     col_ind, values)
     @ccall liboneapi_support.onemklCsparse_set_csr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
+        spmat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
                                                         row_ptr::ZePtr{Int32},
                                                         col_ind::ZePtr{Int32},
                                                         values::ZePtr{ComplexF32})::Cint
 end
 
-function onemklCsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
-                                       row_ptr, col_ind, values)
+function onemklCsparse_set_csr_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
+        row_ptr, col_ind, values
+    )
     @ccall liboneapi_support.onemklCsparse_set_csr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
+        nnz::Int64, index::onemklIndex,
                                                            row_ptr::ZePtr{Int64},
                                                            col_ind::ZePtr{Int64},
                                                            values::ZePtr{ComplexF32})::Cint
 end
 
-function onemklZsparse_set_csr_data(device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
+function onemklZsparse_set_csr_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ptr,
                                     col_ind, values)
     @ccall liboneapi_support.onemklZsparse_set_csr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
+        spmat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
                                                         row_ptr::ZePtr{Int32},
                                                         col_ind::ZePtr{Int32},
                                                         values::ZePtr{ComplexF64})::Cint
 end
 
-function onemklZsparse_set_csr_data_64(device_queue, spmat, nrows, ncols, nnz, index,
-                                       row_ptr, col_ind, values)
+function onemklZsparse_set_csr_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
+        row_ptr, col_ind, values
+    )
     @ccall liboneapi_support.onemklZsparse_set_csr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
+        nnz::Int64, index::onemklIndex,
                                                            row_ptr::ZePtr{Int64},
                                                            col_ind::ZePtr{Int64},
                                                            values::ZePtr{ComplexF64})::Cint
 end
 
-function onemklSsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
-                                    row_ind, values)
-    @ccall liboneapi_support.onemklSsparse_set_csc_data(device_queue::syclQueue_t,
-                                                        spMat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
-                                                        col_ptr::Ptr{Int32},
-                                                        row_ind::Ptr{Int32},
-                                                        values::Ptr{Cfloat})::Cint
-end
-
-function onemklSsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
-                                       col_ptr, row_ind, values)
-    @ccall liboneapi_support.onemklSsparse_set_csc_data_64(device_queue::syclQueue_t,
-                                                           spMat::matrix_handle_t,
-                                                           nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
-                                                           col_ptr::Ptr{Int64},
-                                                           row_ind::Ptr{Int64},
-                                                           values::Ptr{Cfloat})::Cint
-end
-
-function onemklDsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
-                                    row_ind, values)
-    @ccall liboneapi_support.onemklDsparse_set_csc_data(device_queue::syclQueue_t,
-                                                        spMat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
-                                                        col_ptr::Ptr{Int32},
-                                                        row_ind::Ptr{Int32},
-                                                        values::Ptr{Cdouble})::Cint
-end
-
-function onemklDsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
-                                       col_ptr, row_ind, values)
-    @ccall liboneapi_support.onemklDsparse_set_csc_data_64(device_queue::syclQueue_t,
-                                                           spMat::matrix_handle_t,
-                                                           nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
-                                                           col_ptr::Ptr{Int64},
-                                                           row_ind::Ptr{Int64},
-                                                           values::Ptr{Cdouble})::Cint
-end
-
-function onemklCsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
-                                    row_ind, values)
-    @ccall liboneapi_support.onemklCsparse_set_csc_data(device_queue::syclQueue_t,
-                                                        spMat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
-                                                        col_ptr::Ptr{Int32},
-                                                        row_ind::Ptr{Int32},
-                                                        values::Ptr{ComplexF32})::Cint
-end
-
-function onemklCsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
-                                       col_ptr, row_ind, values)
-    @ccall liboneapi_support.onemklCsparse_set_csc_data_64(device_queue::syclQueue_t,
-                                                           spMat::matrix_handle_t,
-                                                           nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
-                                                           col_ptr::Ptr{Int64},
-                                                           row_ind::Ptr{Int64},
-                                                           values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_csc_data(device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
-                                    row_ind, values)
-    @ccall liboneapi_support.onemklZsparse_set_csc_data(device_queue::syclQueue_t,
-                                                        spMat::matrix_handle_t,
-                                                        nrows::Int64, ncols::Int64,
-                                                        nnz::Int64, index::onemklIndex,
-                                                        col_ptr::Ptr{Int32},
-                                                        row_ind::Ptr{Int32},
-                                                        values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_csc_data_64(device_queue, spMat, nrows, ncols, nnz, index,
-                                       col_ptr, row_ind, values)
-    @ccall liboneapi_support.onemklZsparse_set_csc_data_64(device_queue::syclQueue_t,
-                                                           spMat::matrix_handle_t,
-                                                           nrows::Int64, ncols::Int64,
-                                                           nnz::Int64, index::onemklIndex,
-                                                           col_ptr::Ptr{Int64},
-                                                           row_ind::Ptr{Int64},
-                                                           values::Ptr{ComplexF32})::Cint
-end
-
-function onemklSsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklSsparse_set_csc_data(
+        device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+        row_ind, values
+    )
+    return @ccall liboneapi_support.onemklSsparse_set_csc_data(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int32},
+        row_ind::Ptr{Int32},
+        values::Ptr{Cfloat}
+    )::Cint
+end
+
+function onemklSsparse_set_csc_data_64(
+        device_queue, spMat, nrows, ncols, nnz, index,
+        col_ptr, row_ind, values
+    )
+    return @ccall liboneapi_support.onemklSsparse_set_csc_data_64(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int64},
+        row_ind::Ptr{Int64},
+        values::Ptr{Cfloat}
+    )::Cint
+end
+
+function onemklDsparse_set_csc_data(
+        device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+        row_ind, values
+    )
+    return @ccall liboneapi_support.onemklDsparse_set_csc_data(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int32},
+        row_ind::Ptr{Int32},
+        values::Ptr{Cdouble}
+    )::Cint
+end
+
+function onemklDsparse_set_csc_data_64(
+        device_queue, spMat, nrows, ncols, nnz, index,
+        col_ptr, row_ind, values
+    )
+    return @ccall liboneapi_support.onemklDsparse_set_csc_data_64(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int64},
+        row_ind::Ptr{Int64},
+        values::Ptr{Cdouble}
+    )::Cint
+end
+
+function onemklCsparse_set_csc_data(
+        device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+        row_ind, values
+    )
+    return @ccall liboneapi_support.onemklCsparse_set_csc_data(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int32},
+        row_ind::Ptr{Int32},
+        values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklCsparse_set_csc_data_64(
+        device_queue, spMat, nrows, ncols, nnz, index,
+        col_ptr, row_ind, values
+    )
+    return @ccall liboneapi_support.onemklCsparse_set_csc_data_64(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int64},
+        row_ind::Ptr{Int64},
+        values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklZsparse_set_csc_data(
+        device_queue, spMat, nrows, ncols, nnz, index, col_ptr,
+        row_ind, values
+    )
+    return @ccall liboneapi_support.onemklZsparse_set_csc_data(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int32},
+        row_ind::Ptr{Int32},
+        values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklZsparse_set_csc_data_64(
+        device_queue, spMat, nrows, ncols, nnz, index,
+        col_ptr, row_ind, values
+    )
+    return @ccall liboneapi_support.onemklZsparse_set_csc_data_64(
+        device_queue::syclQueue_t,
+        spMat::matrix_handle_t,
+        nrows::Int64, ncols::Int64,
+        nnz::Int64, index::onemklIndex,
+        col_ptr::Ptr{Int64},
+        row_ind::Ptr{Int64},
+        values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklSsparse_set_coo_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ind,
                                     col_ind, values)
     @ccall liboneapi_support.onemklSsparse_set_coo_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                         nrows::Int32, ncols::Int32,
                                                         nnz::Int32, index::onemklIndex,
                                                         row_ind::ZePtr{Int32},
@@ -6624,10 +6670,11 @@ function onemklSsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
                                                         values::ZePtr{Cfloat})::Cint
 end
 
-function onemklSsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklSsparse_set_coo_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
                                        row_ind, col_ind, values)
     @ccall liboneapi_support.onemklSsparse_set_coo_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
                                                            nnz::Int64, index::onemklIndex,
                                                            row_ind::ZePtr{Int64},
@@ -6635,10 +6682,11 @@ function onemklSsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
                                                            values::ZePtr{Cfloat})::Cint
 end
 
-function onemklDsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklDsparse_set_coo_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ind,
                                     col_ind, values)
     @ccall liboneapi_support.onemklDsparse_set_coo_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                         nrows::Int32, ncols::Int32,
                                                         nnz::Int32, index::onemklIndex,
                                                         row_ind::ZePtr{Int32},
@@ -6646,10 +6694,11 @@ function onemklDsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
                                                         values::ZePtr{Cdouble})::Cint
 end
 
-function onemklDsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklDsparse_set_coo_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
                                        row_ind, col_ind, values)
     @ccall liboneapi_support.onemklDsparse_set_coo_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
                                                            nnz::Int64, index::onemklIndex,
                                                            row_ind::ZePtr{Int64},
@@ -6657,10 +6706,11 @@ function onemklDsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
                                                            values::ZePtr{Cdouble})::Cint
 end
 
-function onemklCsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklCsparse_set_coo_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ind,
                                     col_ind, values)
     @ccall liboneapi_support.onemklCsparse_set_coo_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                         nrows::Int32, ncols::Int32,
                                                         nnz::Int32, index::onemklIndex,
                                                         row_ind::ZePtr{Int32},
@@ -6668,10 +6718,11 @@ function onemklCsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
                                                         values::ZePtr{ComplexF32})::Cint
 end
 
-function onemklCsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklCsparse_set_coo_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
                                        row_ind, col_ind, values)
     @ccall liboneapi_support.onemklCsparse_set_coo_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
                                                            nnz::Int64, index::onemklIndex,
                                                            row_ind::ZePtr{Int64},
@@ -6679,10 +6730,11 @@ function onemklCsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
                                                            values::ZePtr{ComplexF32})::Cint
 end
 
-function onemklZsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, index, row_ind,
+function onemklZsparse_set_coo_data(
+        device_queue, spmat, nrows, ncols, nnz, index, row_ind,
                                     col_ind, values)
     @ccall liboneapi_support.onemklZsparse_set_coo_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                         nrows::Int32, ncols::Int32,
                                                         nnz::Int32, index::onemklIndex,
                                                         row_ind::ZePtr{Int32},
@@ -6690,10 +6742,11 @@ function onemklZsparse_set_coo_data(device_queue, spmat, nrows, ncols, nnz, inde
                                                         values::ZePtr{ComplexF64})::Cint
 end
 
-function onemklZsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, index,
+function onemklZsparse_set_coo_data_64(
+        device_queue, spmat, nrows, ncols, nnz, index,
                                        row_ind, col_ind, values)
     @ccall liboneapi_support.onemklZsparse_set_coo_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
+        spmat::matrix_handle_t,
                                                            nrows::Int64, ncols::Int64,
                                                            nnz::Int64, index::onemklIndex,
                                                            row_ind::ZePtr{Int64},
@@ -6701,128 +6754,160 @@ function onemklZsparse_set_coo_data_64(device_queue, spmat, nrows, ncols, nnz, i
                                                            values::ZePtr{ComplexF64})::Cint
 end
 
-function onemklSsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                    row_blk_size, col_blk_size, blk_layout, index,
-                                    bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklSsparse_set_bsr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        blk_nrows::Int64, blk_ncols::Int64,
-                                                        blk_nnz::Int64, row_blk_size::Int64,
-                                                        col_blk_size::Int64,
-                                                        blk_layout::onemklLayout,
-                                                        index::onemklIndex,
-                                                        bsr_row_ptr::Ptr{Int32},
-                                                        bsr_col_ind::Ptr{Int32},
-                                                        bsr_values::Ptr{Cfloat})::Cint
-end
-
-function onemklSsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                       row_blk_size, col_blk_size, blk_layout, index,
-                                       bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklSsparse_set_bsr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
-                                                           blk_nrows::Int64,
-                                                           blk_ncols::Int64, blk_nnz::Int64,
-                                                           row_blk_size::Int64,
-                                                           col_blk_size::Int64,
-                                                           blk_layout::onemklLayout,
-                                                           index::onemklIndex,
-                                                           bsr_row_ptr::Ptr{Int64},
-                                                           bsr_col_ind::Ptr{Int64},
-                                                           bsr_values::Ptr{Cfloat})::Cint
-end
-
-function onemklDsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                    row_blk_size, col_blk_size, blk_layout, index,
-                                    bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklDsparse_set_bsr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        blk_nrows::Int64, blk_ncols::Int64,
-                                                        blk_nnz::Int64, row_blk_size::Int64,
-                                                        col_blk_size::Int64,
-                                                        blk_layout::onemklLayout,
-                                                        index::onemklIndex,
-                                                        bsr_row_ptr::Ptr{Int32},
-                                                        bsr_col_ind::Ptr{Int32},
-                                                        bsr_values::Ptr{Cdouble})::Cint
-end
-
-function onemklDsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                       row_blk_size, col_blk_size, blk_layout, index,
-                                       bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklDsparse_set_bsr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
-                                                           blk_nrows::Int64,
-                                                           blk_ncols::Int64, blk_nnz::Int64,
-                                                           row_blk_size::Int64,
-                                                           col_blk_size::Int64,
-                                                           blk_layout::onemklLayout,
-                                                           index::onemklIndex,
-                                                           bsr_row_ptr::Ptr{Int64},
-                                                           bsr_col_ind::Ptr{Int64},
-                                                           bsr_values::Ptr{Cdouble})::Cint
-end
-
-function onemklCsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                    row_blk_size, col_blk_size, blk_layout, index,
-                                    bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklCsparse_set_bsr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        blk_nrows::Int64, blk_ncols::Int64,
-                                                        blk_nnz::Int64, row_blk_size::Int64,
-                                                        col_blk_size::Int64,
-                                                        blk_layout::onemklLayout,
-                                                        index::onemklIndex,
-                                                        bsr_row_ptr::Ptr{Int32},
-                                                        bsr_col_ind::Ptr{Int32},
-                                                        bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklCsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                       row_blk_size, col_blk_size, blk_layout, index,
-                                       bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklCsparse_set_bsr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
-                                                           blk_nrows::Int64,
-                                                           blk_ncols::Int64, blk_nnz::Int64,
-                                                           row_blk_size::Int64,
-                                                           col_blk_size::Int64,
-                                                           blk_layout::onemklLayout,
-                                                           index::onemklIndex,
-                                                           bsr_row_ptr::Ptr{Int64},
-                                                           bsr_col_ind::Ptr{Int64},
-                                                           bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_bsr_data(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                    row_blk_size, col_blk_size, blk_layout, index,
-                                    bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklZsparse_set_bsr_data(device_queue::syclQueue_t,
-                                                        spmat::matrix_handle_t,
-                                                        blk_nrows::Int64, blk_ncols::Int64,
-                                                        blk_nnz::Int64, row_blk_size::Int64,
-                                                        col_blk_size::Int64,
-                                                        blk_layout::onemklLayout,
-                                                        index::onemklIndex,
-                                                        bsr_row_ptr::Ptr{Int32},
-                                                        bsr_col_ind::Ptr{Int32},
-                                                        bsr_values::Ptr{ComplexF32})::Cint
-end
-
-function onemklZsparse_set_bsr_data_64(device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
-                                       row_blk_size, col_blk_size, blk_layout, index,
-                                       bsr_row_ptr, bsr_col_ind, bsr_values)
-    @ccall liboneapi_support.onemklZsparse_set_bsr_data_64(device_queue::syclQueue_t,
-                                                           spmat::matrix_handle_t,
-                                                           blk_nrows::Int64,
-                                                           blk_ncols::Int64, blk_nnz::Int64,
-                                                           row_blk_size::Int64,
-                                                           col_blk_size::Int64,
-                                                           blk_layout::onemklLayout,
-                                                           index::onemklIndex,
-                                                           bsr_row_ptr::Ptr{Int64},
-                                                           bsr_col_ind::Ptr{Int64},
-                                                           bsr_values::Ptr{ComplexF32})::Cint
+function onemklSsparse_set_bsr_data(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklSsparse_set_bsr_data(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64, blk_ncols::Int64,
+        blk_nnz::Int64, row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int32},
+        bsr_col_ind::Ptr{Int32},
+        bsr_values::Ptr{Cfloat}
+    )::Cint
+end
+
+function onemklSsparse_set_bsr_data_64(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklSsparse_set_bsr_data_64(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64,
+        blk_ncols::Int64, blk_nnz::Int64,
+        row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int64},
+        bsr_col_ind::Ptr{Int64},
+        bsr_values::Ptr{Cfloat}
+    )::Cint
+end
+
+function onemklDsparse_set_bsr_data(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklDsparse_set_bsr_data(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64, blk_ncols::Int64,
+        blk_nnz::Int64, row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int32},
+        bsr_col_ind::Ptr{Int32},
+        bsr_values::Ptr{Cdouble}
+    )::Cint
+end
+
+function onemklDsparse_set_bsr_data_64(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklDsparse_set_bsr_data_64(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64,
+        blk_ncols::Int64, blk_nnz::Int64,
+        row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int64},
+        bsr_col_ind::Ptr{Int64},
+        bsr_values::Ptr{Cdouble}
+    )::Cint
+end
+
+function onemklCsparse_set_bsr_data(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklCsparse_set_bsr_data(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64, blk_ncols::Int64,
+        blk_nnz::Int64, row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int32},
+        bsr_col_ind::Ptr{Int32},
+        bsr_values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklCsparse_set_bsr_data_64(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklCsparse_set_bsr_data_64(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64,
+        blk_ncols::Int64, blk_nnz::Int64,
+        row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int64},
+        bsr_col_ind::Ptr{Int64},
+        bsr_values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklZsparse_set_bsr_data(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklZsparse_set_bsr_data(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64, blk_ncols::Int64,
+        blk_nnz::Int64, row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int32},
+        bsr_col_ind::Ptr{Int32},
+        bsr_values::Ptr{ComplexF32}
+    )::Cint
+end
+
+function onemklZsparse_set_bsr_data_64(
+        device_queue, spmat, blk_nrows, blk_ncols, blk_nnz,
+        row_blk_size, col_blk_size, blk_layout, index,
+        bsr_row_ptr, bsr_col_ind, bsr_values
+    )
+    return @ccall liboneapi_support.onemklZsparse_set_bsr_data_64(
+        device_queue::syclQueue_t,
+        spmat::matrix_handle_t,
+        blk_nrows::Int64,
+        blk_ncols::Int64, blk_nnz::Int64,
+        row_blk_size::Int64,
+        col_blk_size::Int64,
+        blk_layout::onemklLayout,
+        index::onemklIndex,
+        bsr_row_ptr::Ptr{Int64},
+        bsr_col_ind::Ptr{Int64},
+        bsr_values::Ptr{ComplexF32}
+    )::Cint
 end
 
 function onemklXsparse_init_matmat_descr(p_desc)
diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl
index adc17d6..e7088ab 100644
--- a/src/compiler/compilation.jl
+++ b/src/compiler/compilation.jl
@@ -197,7 +197,7 @@ end
         # TODO: emit printf format strings in constant memory
         extensions = String[
             "SPV_EXT_relaxed_printf_string_address_space",
-            "SPV_EXT_shader_atomic_float_add"
+            "SPV_EXT_shader_atomic_float_add",
         ]
     else
         backend = :llvm
diff --git a/src/context.jl b/src/context.jl
index bc5ac70..a84e8c4 100644
--- a/src/context.jl
+++ b/src/context.jl
@@ -232,8 +232,10 @@ function global_queue(ctx::ZeContext, dev::ZeDevice)
             GC.enable_finalizers(false)
             try
                 @lock queue_registry_lock begin
-                    push!(get!(Vector{Tuple{WeakRef,ZeCommandQueue}}, queue_registry, (ctx, dev)),
-                          (WeakRef(current_task()), queue))
+                    push!(
+                        get!(Vector{Tuple{WeakRef, ZeCommandQueue}}, queue_registry, (ctx, dev)),
+                        (WeakRef(current_task()), queue)
+                    )
                 end
             finally
                 GC.enable_finalizers(true)
@@ -255,17 +257,17 @@ end
 # task-local, so once their task is dead no new work can reach them, and the entry can
 # be dropped (allowing the queue to be finalized) after a final synchronize.
 const queue_registry_lock = ReentrantLock()
-const queue_registry = Dict{Tuple{ZeContext,ZeDevice},Vector{Tuple{WeakRef,ZeCommandQueue}}}()
+const queue_registry = Dict{Tuple{ZeContext, ZeDevice}, Vector{Tuple{WeakRef, ZeCommandQueue}}}()
 
 # synchronize all known queues that target the given context (and device, if specified),
 # i.e., all queues whose in-flight work could possibly reference an allocation that is
 # about to be freed.
-function synchronize_all_queues(ctx::ZeContext, dev::Union{ZeDevice,Nothing})
+function synchronize_all_queues(ctx::ZeContext, dev::Union{ZeDevice, Nothing})
     # only the LTS stack populates the queue registry (see `global_queue`); on the
     # rolling stack this is a no-op and `release` frees directly.
     oneL0.LTS[] || return
     queues = ZeCommandQueue[]
-    stale = Tuple{WeakRef,ZeCommandQueue}[]
+    stale = Tuple{WeakRef, ZeCommandQueue}[]
     GC.enable_finalizers(false)
     try
         @lock queue_registry_lock begin
diff --git a/src/mapreduce.jl b/src/mapreduce.jl
index d03d72f..fca02a7 100644
--- a/src/mapreduce.jl
+++ b/src/mapreduce.jl
@@ -160,7 +160,7 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::oneWrappedArray{T},
     # dense array first so every global read in the reduction kernel is coalesced.
     if oneL0.LTS[] && !_dense_reduce_input(A)
         Acontig = Broadcast.materialize(Broadcast.broadcasted(f, A))
-        return GPUArrays.mapreducedim!(identity, op, R, Acontig; init=init)
+        return GPUArrays.mapreducedim!(identity, op, R, Acontig; init = init)
     end
 
     # add singleton dimensions to the output container, if needed
@@ -192,8 +192,9 @@ function GPUArrays.mapreducedim!(f::F, op::OP, R::oneWrappedArray{T},
     if oneL0.LTS[] && size(Rreduce, 1) == 1
         items = clamp(length(Rother), 1, 256)
         groups = min(cld(length(Rother), items), 1024)
-        @oneapi items=items groups=groups coalesced_mapreduce_device(
-            f, op, init, Rreduce, Rother, R′, A)
+        @oneapi items = items groups = groups coalesced_mapreduce_device(
+            f, op, init, Rreduce, Rother, R′, A
+        )
         return R
     end
 
diff --git a/src/utils.jl b/src/utils.jl
index e7d232e..551ff49 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -2,8 +2,10 @@
 function versioninfo(io::IO=stdout)
     if Sys.islinux()
         println(io, "Binary dependencies:")
-        for jll in [oneL0.NEO_jll, oneL0.NEO_jll.libigc_LTS_jll, oneL0.NEO_jll.gmmlib_jll,
-                    SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll, oneAPI_Support_jll]
+        for jll in [
+                oneL0.NEO_jll, oneL0.NEO_jll.libigc_LTS_jll, oneL0.NEO_jll.gmmlib_jll,
+                SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll, oneAPI_Support_jll,
+            ]
             name = string(jll)
             print(io, "- $(name[1:end-4]): $(Base.pkgversion(jll))")
             if jll.host_platform !== nothing
diff --git a/test/onemkl.jl b/test/onemkl.jl
index 4a4c7fc..518c179 100644
--- a/test/onemkl.jl
+++ b/test/onemkl.jl
@@ -14,7 +14,7 @@ k = 13
 
 @testset "Version" begin
     version_onemkl = oneMKL.version()
-    @test version_onemkl ≥ v"2025.2.0"
+        @test version_onemkl ≥ v"2025.2.0"
 end
 
 ############################################################################################

@michel2323 michel2323 force-pushed the aurora-lts-2523.40 branch from eec2785 to 991d29e Compare June 12, 2026 14:28
@codecov

codecov Bot commented Jun 13, 2026

Copy link
Copy Markdown

Codecov Report

❌ Patch coverage is 94.18605% with 5 lines in your changes missing coverage. Please review.
✅ Project coverage is 80.96%. Comparing base (f0bc493) to head (b13cc3e).

Files with missing lines Patch % Lines
src/compiler/compilation.jl 66.66% 2 Missing ⚠️
src/mapreduce.jl 84.61% 2 Missing ⚠️
src/oneAPI.jl 80.00% 1 Missing ⚠️
Additional details and impacted files
@@            Coverage Diff             @@
##             main     #574      +/-   ##
==========================================
+ Coverage   79.02%   80.96%   +1.94%     
==========================================
  Files          49       49              
  Lines        3333     3321      -12     
==========================================
+ Hits         2634     2689      +55     
+ Misses        699      632      -67     

☔ View full report in Codecov by Harness.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

michel2323 and others added 7 commits June 23, 2026 15:45
Aurora ships the Intel "LTS" GPU software stack rather than the rolling
release that upstream oneAPI.jl targets. Pin the whole toolchain to it:

  - JLLs: NEO_LTS_jll =25.18.33578, oneAPI_Level_Zero_Loader_LTS_jll
    =1.24, oneAPI_Level_Zero_Headers_LTS_jll, libigc_LTS_jll, and the
    oneMKL 2025.3.1 support library. lib/level-zero/oneL0.jl aliases the
    *_LTS_jll packages back to their canonical names so the rest of the
    code is unchanged.
  - Keep the SPIR-V Translator (SPIRV_LLVM_Translator_jll 21,
    SPIRVIntrinsics 0.5). The LTS NEO/IGC runtime does not accept the
    output of upstream's LLVM SPIR-V back-end (#491), so revert the
    back-end switch in src/compiler/compilation.jl, src/oneAPI.jl and
    src/utils.jl and restore the explicit SPIR-V extension list.
  - Regenerate the oneMKL C bindings against 2025.3.1 (deps/src/onemkl.*,
    lib/support/liboneapi_support.jl, deps/generate_interfaces.jl) and
    fix the sparse CSR argument types (ZePtr/onemklIndex) in the
    regenerated bindings and res/support.toml. Stay on oneMKL 2025.3.1,
    declining the upstream oneAPI 2026.0.0 migration (#582).
  - Prepend NEO's libze_intel_gpu directory to LD_LIBRARY_PATH in
    __init__ so libsycl's bundled ze_lib finds the driver by path when
    no system NEO is installed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
The LTS IGC silently miscompiles strided memory accesses inside the
reduction kernels, producing wrong results with no error. Add two
complementary guards in src/mapreduce.jl:

  - A coalesced reduction kernel for the case where the contiguous
    leading dimension is reduced (size(Rreduce, 1) == 1, e.g.
    sum(A; dims=2)), avoiding the strided per-thread loads.
  - Materialize strided / non-dense inputs (Transpose, Adjoint,
    PermutedDimsArray, SubArray, ...) to a dense array before reducing,
    via _dense_reduce_input; this fixes e.g. `a == transpose(b)`.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
On the LTS NEO stack (25.18) freeing a buffer does not drain queues that
still have work in flight referencing it: the in-flight kernel then
faults and the context is banned, surfacing later as a
ZE_RESULT_ERROR_UNKNOWN at an unrelated op. global_queue is task-local,
so a test file's task can also die with work still queued.

  - Register every global queue in a per-(context,device) registry that
    holds the queue strongly, keyed by a weak reference to the owning
    task (src/context.jl). A WeakRef to the queue would be cleared in the
    same GC cycle that queues its finalizer, hiding it from release
    exactly when its in-flight work still references buffers being freed.
  - Before any BLOCKING_FREE, synchronize all queues that could
    reference the buffer (src/pool.jl, synchronize_all_queues).
  - Synchronize outside the registry lock with finalizers disabled, skip
    already-finalized queues, and retire queues of dead tasks.
  - In the queue finalizer, drain (unchecked, since a banned context
    returns an error) then destroy, and null the handle so a concurrent
    synchronize_all_queues skips it (lib/level-zero/cmdqueue.jl).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
The DFT wrappers passed pointer(lengths) / pointer(strides) to the
descriptor create/set calls. A raw Ptr does not keep the backing vector
alive, so the GC could collect it mid-call and oneMKL would read garbage
dimensions/strides, failing commit with FFT_INVALID_DESCRIPTOR or a
SIGFPE depending on heap reuse. Pass the arrays themselves so ccall
roots them for the duration of the call (lib/mkl/fft.jl).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
The Aurora LTS NEO stack intermittently drops the tail of a command
list, silently corrupting results. Add an off-by-default workaround
(ONEAPI_SYNC_EACH_SUBMISSION=1) that synchronizes the queue after every
command-list submission (lib/level-zero/cmdlist.jl, parsed in
lib/level-zero/oneL0.jl __init__).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Set up the GitHub Actions job for the Aurora LTS self-runner
(.github/workflows/ci.yml):

  - Enable per-worker GPU spreading (ONEAPI_TEST_SPREAD_GPUS=1) and the
    per-submission synchronize workaround (ONEAPI_SYNC_EACH_SUBMISSION=1).
  - Run the test step through `julia -C native,-avx512fp16` to avoid
    Sapphire Rapids AVX512-FP16 host miscompilation of Float16 work under
    concurrent oneMKL load (the GPU results are correct; the corruption
    is on the host CPU).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Make the Intel LTS-stack support opt-in so this branch can eventually merge
into main without affecting the rolling stack: with the flag off, every LTS
code path is bypassed and behavior matches upstream.

Introduce oneL0.LTS[], resolved at the top of oneL0.__init__ from ONEAPI_LTS
(default on for this branch; set ONEAPI_LTS=0 for the rolling-stack paths).
Resolved before the driver-availability early returns so it is set even on a
host without a functional GPU. Gate on it:

  - Layer 1 (behavior): the strided-reduction materialization and the
    coalesced-kernel dispatch in mapreducedim!; the command-queue registry
    (global_queue / synchronize_all_queues), the pre-free synchronize in
    release, and the queue-finalizer drain + handle-null in cmdqueue.jl.
  - Layer 2 (codegen): _compiler_config now selects the SPIRVCompilerTarget
    backend (:khronos translator vs :llvm back-end) and the SPIR-V extension
    list from the flag. GPUCompiler loads the tool lazily, so both SPIR-V JLLs
    can coexist as deps and the choice is made at compile time.

Deferred to layer 3: driver-JLL selection (NEO/loader/headers/igc) via
Preferences and the oneMKL support-library ABI. The oneMKL FFT GC-rooting is
left as-is (already fixed upstream via GC.@preserve).

Verified: precompiles and loads; ONEAPI_LTS unset/1 -> on, ONEAPI_LTS=0 -> off.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
@michel2323 michel2323 force-pushed the aurora-lts-2523.40 branch from 32d11d7 to b13cc3e Compare June 24, 2026 13:26
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant