ci: improvements from #15982

This commit is contained in:
Daniel Hiltgen 2026-05-08 14:05:11 -07:00
parent 88935c21b5
commit 4d142c0724
10 changed files with 250 additions and 38 deletions

View file

@ -16,7 +16,7 @@ jobs:
outputs:
GOFLAGS: ${{ steps.goflags.outputs.GOFLAGS }}
VERSION: ${{ steps.goflags.outputs.VERSION }}
vendorsha: ${{ steps.changes.outputs.vendorsha }}
vendorsha: ${{ steps.goflags.outputs.vendorsha }}
steps:
- uses: actions/checkout@v4
- name: Set environment
@ -142,6 +142,13 @@ jobs:
env:
GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
steps:
- if: startsWith(matrix.preset, 'MLX ')
name: Increase pagefile to 200 GB
uses: al-cheb/configure-pagefile-action@v1.5
with:
minimum-size: 16GB
maximum-size: 200GB
disk-root: "D:"
- name: Install system dependencies
run: |
choco install -y --no-progress ccache ninja
@ -381,17 +388,33 @@ jobs:
dist/*.ps1
dist/OllamaSetup.exe
linux-build:
linux-depends:
strategy:
matrix:
include:
- os: linux
arch: amd64
target: archive
- os: linux
arch: arm64
target: archive
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
- arch: amd64
target: llama-server-cpu
- arch: amd64
target: llama-server-cuda-v12
- arch: amd64
target: llama-server-cuda-v13
- arch: amd64
target: mlx
- arch: amd64
target: llama-server-rocm
- arch: amd64
target: llama-server-vulkan
- arch: arm64
target: llama-server-cpu
- arch: arm64
target: llama-server-cuda-v12
- arch: arm64
target: llama-server-cuda-v13
- arch: arm64
target: jetpack-5
- arch: arm64
target: jetpack-6
runs-on: ${{ matrix.arch == 'arm64' && 'linux-arm64' || 'linux' }}
environment: release
needs: setup-environment
env:
@ -399,17 +422,101 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- if: matrix.target == 'mlx'
name: Increase Linux swap to 200 GB
shell: bash
run: |
set -e
SWAP_PATH=/swapfile-mlx
SWAP_SIZE_GB=200
if [ -f "$SWAP_PATH" ]; then
sudo swapoff "$SWAP_PATH" 2>/dev/null || true
sudo rm -f "$SWAP_PATH"
fi
if ! sudo fallocate -l ${SWAP_SIZE_GB}G "$SWAP_PATH" 2>/dev/null; then
echo "fallocate unsupported, falling back to dd"
sudo dd if=/dev/zero of="$SWAP_PATH" bs=1M count=$((SWAP_SIZE_GB * 1024))
fi
sudo chmod 600 "$SWAP_PATH"
sudo mkswap "$SWAP_PATH"
sudo swapon "$SWAP_PATH"
swapon --show
free -h
- uses: docker/build-push-action@v6
with:
context: .
platforms: linux/${{ matrix.arch }}
target: ${{ matrix.target }}
provenance: false
sbom: false
build-args: |
GOFLAGS=${{ env.GOFLAGS }}
CGO_CFLAGS=${{ env.CGO_CFLAGS }}
CGO_CXXFLAGS=${{ env.CGO_CXXFLAGS }}
OLLAMA_MLX_BUILD_JOBS=16
OLLAMA_MLX_NVCC_THREADS=6
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
cache-from: |
type=registry,ref=ollama/release:cache-${{ matrix.arch }}-${{ matrix.target }}
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
cache-to: type=registry,ref=ollama/release:cache-${{ matrix.arch }}-${{ matrix.target }},mode=max
linux-build:
strategy:
matrix:
include:
- os: linux
arch: amd64
target: archive
cache-from: |
type=registry,ref=ollama/release:cache-amd64-llama-server-cpu
type=registry,ref=ollama/release:cache-amd64-llama-server-cuda-v12
type=registry,ref=ollama/release:cache-amd64-llama-server-cuda-v13
type=registry,ref=ollama/release:cache-amd64-mlx
type=registry,ref=ollama/release:cache-amd64-llama-server-rocm
type=registry,ref=ollama/release:cache-amd64-llama-server-vulkan
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
- os: linux
arch: arm64
target: archive
cache-from: |
type=registry,ref=ollama/release:cache-arm64-llama-server-cpu
type=registry,ref=ollama/release:cache-arm64-llama-server-cuda-v12
type=registry,ref=ollama/release:cache-arm64-llama-server-cuda-v13
type=registry,ref=ollama/release:cache-arm64-jetpack-5
type=registry,ref=ollama/release:cache-arm64-jetpack-6
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
environment: release
needs: [setup-environment, linux-depends]
env:
GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
username: ${{ vars.DOCKER_USER }}
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
- uses: docker/build-push-action@v6
with:
context: .
platforms: ${{ matrix.os }}/${{ matrix.arch }}
target: ${{ matrix.target }}
provenance: false
sbom: false
build-args: |
GOFLAGS=${{ env.GOFLAGS }}
CGO_CFLAGS=${{ env.CGO_CFLAGS }}
CGO_CXXFLAGS=${{ env.CGO_CXXFLAGS }}
OLLAMA_MLX_BUILD_JOBS=16
OLLAMA_MLX_NVCC_THREADS=6
outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
cache-from: ${{ matrix.cache-from }}
cache-to: type=inline
- name: Deduplicate CUDA libraries
run: |
@ -458,6 +565,15 @@ jobs:
CGO_CXXFLAGS
GOFLAGS
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
OLLAMA_MLX_BUILD_JOBS=16
OLLAMA_MLX_NVCC_THREADS=6
cache-from: |
type=registry,ref=ollama/release:cache-arm64-llama-server-cpu
type=registry,ref=ollama/release:cache-arm64-llama-server-cuda-v12
type=registry,ref=ollama/release:cache-arm64-llama-server-cuda-v13
type=registry,ref=ollama/release:cache-arm64-jetpack-5
type=registry,ref=ollama/release:cache-arm64-jetpack-6
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
- os: linux
arch: amd64
build-args: |
@ -465,6 +581,15 @@ jobs:
CGO_CXXFLAGS
GOFLAGS
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
OLLAMA_MLX_BUILD_JOBS=16
OLLAMA_MLX_NVCC_THREADS=6
cache-from: |
type=registry,ref=ollama/release:cache-amd64-llama-server-cpu
type=registry,ref=ollama/release:cache-amd64-llama-server-cuda-v12
type=registry,ref=ollama/release:cache-amd64-llama-server-cuda-v13
type=registry,ref=ollama/release:cache-amd64-mlx
type=registry,ref=ollama/release:cache-amd64-llama-server-vulkan
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
- os: linux
arch: amd64
suffix: '-rocm'
@ -474,9 +599,15 @@ jobs:
GOFLAGS
FLAVOR=rocm
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
OLLAMA_MLX_BUILD_JOBS=16
OLLAMA_MLX_NVCC_THREADS=6
cache-from: |
type=registry,ref=ollama/release:cache-amd64-llama-server-cpu
type=registry,ref=ollama/release:cache-amd64-llama-server-rocm
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
environment: release
needs: setup-environment
needs: [setup-environment, linux-depends]
env:
GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
steps:
@ -491,9 +622,11 @@ jobs:
with:
context: .
platforms: ${{ matrix.os }}/${{ matrix.arch }}
provenance: false
sbom: false
build-args: ${{ matrix.build-args }}
outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
cache-from: ${{ matrix.cache-from }}
cache-to: type=inline
- run: |
mkdir -p ${{ matrix.os }}-${{ matrix.arch }}

View file

@ -71,7 +71,7 @@ jobs:
- preset: 'MLX CUDA 13'
container: nvidia/cuda:13.0.0-devel-ubuntu22.04
extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DMLX_CUDA_ARCHITECTURES=80-virtual -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
install-go: true
mlx_preset: 'MLX CUDA 13'
runs-on: linux
@ -155,7 +155,7 @@ jobs:
- preset: 'MLX CUDA 13'
install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
cudnn-install: https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.18.1.3_cuda13-archive.zip
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
flags: '-DCMAKE_CUDA_ARCHITECTURES=80 -DMLX_CUDA_ARCHITECTURES=80-virtual'
mlx_preset: 'MLX CUDA 13'
cuda-components:
- '"cudart"'

View file

@ -92,15 +92,21 @@ if(MLX_ENGINE)
list(APPEND MLX_INCLUDE_REGEXES "^dl\\.dll$")
endif()
# Keep mlx/mlxc targets separate from runtime dependencies so
# --strip only applies to the binaries we build, not vendor DLLs/libs.
install(TARGETS mlx mlxc
RUNTIME_DEPENDENCIES
DIRECTORIES ${MLX_RUNTIME_DIRS}
PRE_INCLUDE_REGEXES ${MLX_INCLUDE_REGEXES}
PRE_EXCLUDE_REGEXES ".*"
RUNTIME_DEPENDENCY_SET mlx_runtime_deps
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
)
install(RUNTIME_DEPENDENCY_SET mlx_runtime_deps
DIRECTORIES ${MLX_RUNTIME_DIRS}
PRE_INCLUDE_REGEXES ${MLX_INCLUDE_REGEXES}
PRE_EXCLUDE_REGEXES ".*"
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX_VENDOR
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX_VENDOR
)
if(TARGET jaccl)
install(TARGETS jaccl
@ -230,8 +236,7 @@ if(MLX_ENGINE)
if(MLX_CUDA_LIBS)
install(FILES ${MLX_CUDA_LIBS}
DESTINATION ${OLLAMA_INSTALL_DIR}
COMPONENT MLX)
COMPONENT MLX_VENDOR)
endif()
endif()
endif()

View file

@ -29,7 +29,8 @@
"name": "MLX CUDA 13",
"inherits": [ "MLX" ],
"cacheVariables": {
"MLX_CUDA_ARCHITECTURES": "86;89;90;90a;100;103;75-virtual;80-virtual;110-virtual;120-virtual;121-virtual",
"CMAKE_CUDA_FLAGS": "-t 2",
"MLX_CUDA_ARCHITECTURES": "75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;103-virtual;110-virtual;120-virtual;121-virtual",
"OLLAMA_RUNNER_DIR": "mlx_cuda_v13"
}
}

View file

@ -177,6 +177,8 @@ RUN --mount=type=cache,target=/root/.ccache \
FROM base AS mlx
ARG CUDA13VERSION=13.0
ARG OLLAMA_MLX_BUILD_JOBS=
ARG OLLAMA_MLX_NVCC_THREADS=2
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-} \
&& dnf install -y openblas-devel lapack-devel \
&& dnf install -y libcudnn9-cuda-13 libcudnn9-devel-cuda-13 \
@ -202,9 +204,10 @@ RUN --mount=type=cache,target=/root/.ccache \
&& if [ -f /tmp/local-mlx-c/CMakeLists.txt ]; then \
export OLLAMA_MLX_C_SOURCE=/tmp/local-mlx-c; \
fi \
&& cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
&& cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) \
&& cmake --install build --component MLX --strip
&& cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas -DCMAKE_CUDA_FLAGS="-t ${OLLAMA_MLX_NVCC_THREADS}" \
&& cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) ${OLLAMA_MLX_BUILD_JOBS:+-j ${OLLAMA_MLX_BUILD_JOBS}} \
&& cmake --install build --component MLX --strip \
&& cmake --install build --component MLX_VENDOR
#
# Go build

View file

@ -71,6 +71,7 @@ _build_darwin() {
-DOLLAMA_RUNNER_DIR=./
cmake --build $BUILD_DIR --target mlx mlxc -j
cmake --install $BUILD_DIR --component MLX
cmake --install $BUILD_DIR --component MLX_VENDOR
# Build llama-server statically (no Metal on x86, CPU-only)
status "Building darwin $ARCH llama-server (static)"
cmake -S llama/server --preset cpu \
@ -97,6 +98,7 @@ _build_darwin() {
-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
cmake --build $BUILD_DIR --target mlx mlxc --parallel
cmake --install $BUILD_DIR --component MLX
cmake --install $BUILD_DIR --component MLX_VENDOR
# Metal 4.x build (NAX-enabled, macOS 26+)
# Only possible with Xcode 26+ SDK; skip on older toolchains.
@ -118,6 +120,7 @@ _build_darwin() {
-DFETCHCONTENT_SOURCE_DIR_METAL_CPP=$V3_DEPS/metal_cpp-src
cmake --build $BUILD_DIR_V4 --target mlx mlxc --parallel
cmake --install $BUILD_DIR_V4 --component MLX
cmake --install $BUILD_DIR_V4 --component MLX_VENDOR
else
status "Skipping MLX Metal v4 (SDK $SDK_MAJOR < 26, need Xcode 26+)"
fi

View file

@ -1,9 +1,14 @@
#!/bin/sh
#
# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder
# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder.
# Use the docker-container driver with the bundled buildkit GC config
# for improved cache behavior.
#
# docker context create amd64 --docker host=ssh://mybuildhost
# docker buildx create --name mybuilder amd64 --platform linux/amd64
# docker buildx create --name mybuilder \
# --driver docker-container \
# --config ./scripts/buildkitd.toml.example \
# --bootstrap amd64 --platform linux/amd64
# docker buildx create --name mybuilder --append desktop-linux --platform linux/arm64
# docker buildx use mybuilder
@ -44,18 +49,27 @@ fi
# buildx behavior changes for single vs. multiplatform
echo "Compressing linux tar bundles..."
if echo $PLATFORM | grep "," > /dev/null ; then
tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64.tar.zst
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
tar c -C ./dist/linux_amd64 --exclude rocm --exclude 'mlx*' --exclude include . | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64.tar.zst
tar c -C ./dist/linux_amd64 ./lib/ollama/rocm | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
tar c -C ./dist/linux_amd64 ./lib/ollama/mlx_cuda_v13 ./lib/ollama/include | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | zstd -9 -T0 >./dist/ollama-linux-arm64.tar.zst
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
tar c -C ./dist/linux_amd64 --exclude rocm --exclude 'mlx*' . | zstd -9 -T0 >./dist/ollama-linux-amd64.tar.zst
tar c -C ./dist/linux_amd64 ./lib/ollama/rocm | zstd -9 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
( cd ./dist/linux_amd64 && tar c lib/ollama/mlx* ) | zstd -9 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
elif echo $PLATFORM | grep "arm64" > /dev/null ; then
tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64.tar.zst
tar c -C ./dist/ ./lib/ollama/cuda_jetpack5 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
tar c -C ./dist/ ./lib/ollama/cuda_jetpack6 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | zstd -9 -T0 >./dist/ollama-linux-arm64.tar.zst
tar c -C ./dist/ ./lib/ollama/cuda_jetpack5 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
tar c -C ./dist/ ./lib/ollama/cuda_jetpack6 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
elif echo $PLATFORM | grep "amd64" > /dev/null ; then
tar c -C ./dist/ --exclude rocm --exclude 'mlx*' --exclude include bin lib | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64.tar.zst
tar c -C ./dist/ ./lib/ollama/rocm | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
tar c -C ./dist/ ./lib/ollama/mlx_cuda_v13 ./lib/ollama/include | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
tar c -C ./dist/ --exclude rocm --exclude 'mlx*' bin lib | zstd -9 -T0 >./dist/ollama-linux-amd64.tar.zst
tar c -C ./dist/ ./lib/ollama/rocm | zstd -9 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
( cd ./dist/ && tar c lib/ollama/mlx* ) | zstd -9 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
fi
LIMIT=2147483648
for f in ./dist/ollama-linux-*.tar.zst; do
[ -f "$f" ] || continue
size=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f")
if [ "$size" -gt "$LIMIT" ]; then
echo "WARNING: $f is $size bytes ($((size - LIMIT)) over the 2 GiB GitHub release-asset limit)" >&2
fi
done

View file

@ -368,6 +368,8 @@ function mlxCuda13 {
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build\mlx_cuda_v$cudaMajorVer --component "MLX" --strip
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build\mlx_cuda_v$cudaMajorVer --component "MLX_VENDOR"
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} else {
Write-Output "CUDA v$cudaMajorVer not detected, skipping MLX build"
}

View file

@ -0,0 +1,21 @@
# Suggested BuildKit GC config for Ollama local development.
[worker.oci]
gc = true
gckeepstorage = "150GB"
[[worker.oci.gcpolicy]]
filters = ["type==source.local", "type==source.git.checkout"]
keepDuration = "48h"
maxUsedSpace = "5GB"
[[worker.oci.gcpolicy]]
filters = ["type==exec.cachemount"]
keepDuration = "168h"
maxUsedSpace = "20GB"
[[worker.oci.gcpolicy]]
keepDuration = "720h"
reservedSpace = "20GB"
maxUsedSpace = "150GB"
minFreeSpace = "50GB"

View file

@ -94,6 +94,36 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(mlx-c)
# MLX CUDA builds have a long tail across many architecture variants. Using
# higher NVCC thread counts helps, but every .cu can then spawn multiple
# memory-heavy compiler processes. Keep those compiles in a Ninja pool sized
# from host RAM so CI can use more threads without falling over.
if(CMAKE_GENERATOR STREQUAL "Ninja")
file(GLOB_RECURSE _mlx_cu
"${mlx_SOURCE_DIR}/mlx/backend/cuda/*.cu"
"${mlx_BINARY_DIR}/mlx/backend/cuda/*.cu"
)
if(_mlx_cu)
set(MLX_CUDA_RAM_MB 22000 CACHE STRING
"Per-file memory budget in MB for the MLX CUDA compile job pool.")
cmake_host_system_information(RESULT _ram_mb QUERY TOTAL_PHYSICAL_MEMORY)
math(EXPR _cuda_pool "${_ram_mb} / ${MLX_CUDA_RAM_MB}")
if(_cuda_pool LESS 2)
set(_cuda_pool 2)
endif()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS cuda_compile=${_cuda_pool})
list(LENGTH _mlx_cu _cu_count)
foreach(f ${_mlx_cu})
set_property(SOURCE "${f}"
TARGET_DIRECTORY mlx
PROPERTY JOB_POOL_COMPILE cuda_compile)
endforeach()
message(STATUS "MLX cuda_compile JOB_POOL: ${_cu_count} files, pool size ${_cuda_pool} (host RAM ${_ram_mb} MB / ${MLX_CUDA_RAM_MB} MB per file)")
else()
message(WARNING "MLX cuda_compile JOB_POOL: no CUDA sources found under mlx/backend/cuda")
endif()
endif()
# Sync vendored headers with fetched version
file(GLOB _mlx_c_hdrs "${mlx-c_SOURCE_DIR}/mlx/c/*.h")
file(COPY ${_mlx_c_hdrs} DESTINATION "${CMAKE_SOURCE_DIR}/x/mlxrunner/mlx/include/mlx/c/")