mirror of
https://github.com/ollama/ollama.git
synced 2026-05-13 14:27:00 +00:00
Merge 0ba3649296 into 3af1a008e2
This commit is contained in:
commit
bf4d6c2af6
10 changed files with 269 additions and 81 deletions
203
.github/workflows/release.yaml
vendored
203
.github/workflows/release.yaml
vendored
|
|
@ -141,6 +141,14 @@ jobs:
|
|||
env:
|
||||
GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
|
||||
steps:
|
||||
# Increase pagefile to handle momentary spikes in RAM from NVCC compiles
|
||||
- if: startsWith(matrix.preset, 'MLX ')
|
||||
name: Increase pagefile to 200 GB
|
||||
uses: al-cheb/configure-pagefile-action@v1.5
|
||||
with:
|
||||
minimum-size: 16GB
|
||||
maximum-size: 200GB
|
||||
disk-root: "D:"
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
choco install -y --no-progress ccache ninja
|
||||
|
|
@ -237,7 +245,7 @@ jobs:
|
|||
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }} --install-prefix "$((pwd).Path)\dist\${{ matrix.os }}-${{ matrix.arch }}"
|
||||
cmake --build --parallel ([Environment]::ProcessorCount) --preset "${{ matrix.preset }}"
|
||||
cmake --build --preset "${{ matrix.preset }}" -- -l $([Environment]::ProcessorCount)
|
||||
cmake --install build --component "${{ startsWith(matrix.preset, 'MLX ') && 'MLX' || startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || startsWith(matrix.preset, 'Vulkan') && 'Vulkan' || 'CPU' }}" --strip
|
||||
Remove-Item -Path dist\lib\ollama\rocm\rocblas\library\*gfx906* -ErrorAction SilentlyContinue
|
||||
env:
|
||||
|
|
@ -380,20 +388,36 @@ jobs:
|
|||
dist/*.ps1
|
||||
dist/OllamaSetup.exe
|
||||
|
||||
linux-build:
|
||||
# Pre-build each Dockerfile stage on its own runner in parallel and push the
|
||||
# resulting layers to a per-stage registry cache. The downstream
|
||||
# docker-build-push job then assembles cache-hit-only.
|
||||
linux-depends:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: linux
|
||||
arch: amd64
|
||||
target: archive
|
||||
- os: linux
|
||||
arch: amd64
|
||||
target: rocm
|
||||
- os: linux
|
||||
arch: arm64
|
||||
target: archive
|
||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||
- arch: amd64
|
||||
target: cpu
|
||||
- arch: amd64
|
||||
target: cuda-12
|
||||
- arch: amd64
|
||||
target: cuda-13
|
||||
- arch: amd64
|
||||
target: mlx
|
||||
- arch: amd64
|
||||
target: rocm-7
|
||||
- arch: amd64
|
||||
target: vulkan
|
||||
- arch: arm64
|
||||
target: cpu
|
||||
- arch: arm64
|
||||
target: cuda-12
|
||||
- arch: arm64
|
||||
target: cuda-13
|
||||
- arch: arm64
|
||||
target: jetpack-5
|
||||
- arch: arm64
|
||||
target: jetpack-6
|
||||
runs-on: ${{ matrix.arch == 'arm64' && 'linux-arm64' || 'linux' }}
|
||||
environment: release
|
||||
needs: setup-environment
|
||||
env:
|
||||
|
|
@ -401,53 +425,53 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ vars.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
|
||||
# Increase swap to handle momentary spikes in RAM from NVCC compiles
|
||||
- if: matrix.target == 'mlx'
|
||||
name: Increase Linux swap to 200 GB
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
SWAP_PATH=/swapfile-mlx
|
||||
SWAP_SIZE_GB=200
|
||||
if [ -f "$SWAP_PATH" ]; then
|
||||
sudo swapoff "$SWAP_PATH" 2>/dev/null || true
|
||||
sudo rm -f "$SWAP_PATH"
|
||||
fi
|
||||
if ! sudo fallocate -l ${SWAP_SIZE_GB}G "$SWAP_PATH" 2>/dev/null; then
|
||||
echo "fallocate unsupported, falling back to dd"
|
||||
sudo dd if=/dev/zero of="$SWAP_PATH" bs=1M count=$((SWAP_SIZE_GB * 1024))
|
||||
fi
|
||||
sudo chmod 600 "$SWAP_PATH"
|
||||
sudo mkswap "$SWAP_PATH"
|
||||
sudo swapon "$SWAP_PATH"
|
||||
swapon --show
|
||||
free -h
|
||||
- uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
platforms: ${{ matrix.os }}/${{ matrix.arch }}
|
||||
platforms: linux/${{ matrix.arch }}
|
||||
target: ${{ matrix.target }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
build-args: |
|
||||
GOFLAGS=${{ env.GOFLAGS }}
|
||||
CGO_CFLAGS=${{ env.CGO_CFLAGS }}
|
||||
CGO_CXXFLAGS=${{ env.CGO_CXXFLAGS }}
|
||||
outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
cache-to: type=inline
|
||||
- name: Deduplicate CUDA libraries
|
||||
run: |
|
||||
./scripts/deduplicate_cuda_libs.sh dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
for COMPONENT in bin/* lib/ollama/*; do
|
||||
case "$COMPONENT" in
|
||||
bin/ollama*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/*.so*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/cuda_v*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/vulkan*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/mlx*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-mlx.tar.in ;;
|
||||
lib/ollama/include*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-mlx.tar.in ;;
|
||||
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
||||
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
||||
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
||||
esac
|
||||
done
|
||||
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
echo "Manifests"
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in ; do
|
||||
echo $ARCHIVE
|
||||
cat $ARCHIVE
|
||||
done
|
||||
- run: |
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
|
||||
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | zstd --ultra -22 -T0 >$(basename ${ARCHIVE//.*/}.tar.zst);
|
||||
done
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: bundles-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}
|
||||
path: |
|
||||
*.tar.zst
|
||||
GOFLAGS=${{ env.GOFLAGS }}
|
||||
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
|
||||
OLLAMA_MLX_BUILD_JOBS=16
|
||||
OLLAMA_MLX_NVCC_THREADS=6
|
||||
cache-from: |
|
||||
type=registry,ref=ollama/release:cache-${{ matrix.arch }}-${{ matrix.target }}
|
||||
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
cache-to: type=registry,ref=ollama/release:cache-${{ matrix.arch }}-${{ matrix.target }},mode=max
|
||||
|
||||
# Build each Docker variant (OS, arch, and flavor) separately. Using QEMU is unreliable and slower.
|
||||
# Heavy stages were pre-built by linux-depends; this job is cache-hit-only for those layers
|
||||
# and just assembles, runs the Go build, and pushes the final image.
|
||||
docker-build-push:
|
||||
strategy:
|
||||
matrix:
|
||||
|
|
@ -459,6 +483,15 @@ jobs:
|
|||
CGO_CXXFLAGS
|
||||
GOFLAGS
|
||||
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
|
||||
OLLAMA_MLX_BUILD_JOBS=16
|
||||
OLLAMA_MLX_NVCC_THREADS=6
|
||||
cache-from: |
|
||||
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
type=registry,ref=ollama/release:cache-arm64-cpu
|
||||
type=registry,ref=ollama/release:cache-arm64-cuda-12
|
||||
type=registry,ref=ollama/release:cache-arm64-cuda-13
|
||||
type=registry,ref=ollama/release:cache-arm64-jetpack-5
|
||||
type=registry,ref=ollama/release:cache-arm64-jetpack-6
|
||||
- os: linux
|
||||
arch: amd64
|
||||
build-args: |
|
||||
|
|
@ -466,6 +499,15 @@ jobs:
|
|||
CGO_CXXFLAGS
|
||||
GOFLAGS
|
||||
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
|
||||
OLLAMA_MLX_BUILD_JOBS=16
|
||||
OLLAMA_MLX_NVCC_THREADS=6
|
||||
cache-from: |
|
||||
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
type=registry,ref=ollama/release:cache-amd64-cpu
|
||||
type=registry,ref=ollama/release:cache-amd64-cuda-12
|
||||
type=registry,ref=ollama/release:cache-amd64-cuda-13
|
||||
type=registry,ref=ollama/release:cache-amd64-mlx
|
||||
type=registry,ref=ollama/release:cache-amd64-vulkan
|
||||
- os: linux
|
||||
arch: amd64
|
||||
suffix: '-rocm'
|
||||
|
|
@ -475,9 +517,15 @@ jobs:
|
|||
GOFLAGS
|
||||
FLAVOR=rocm
|
||||
APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
|
||||
OLLAMA_MLX_BUILD_JOBS=16
|
||||
OLLAMA_MLX_NVCC_THREADS=6
|
||||
cache-from: |
|
||||
type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
type=registry,ref=ollama/release:cache-amd64-cpu
|
||||
type=registry,ref=ollama/release:cache-amd64-rocm-7
|
||||
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
|
||||
environment: release
|
||||
needs: setup-environment
|
||||
needs: [setup-environment, linux-depends]
|
||||
env:
|
||||
GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
|
||||
steps:
|
||||
|
|
@ -492,9 +540,11 @@ jobs:
|
|||
with:
|
||||
context: .
|
||||
platforms: ${{ matrix.os }}/${{ matrix.arch }}
|
||||
provenance: false
|
||||
sbom: false
|
||||
build-args: ${{ matrix.build-args }}
|
||||
outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
|
||||
cache-from: ${{ matrix.cache-from }}
|
||||
cache-to: type=inline
|
||||
- run: |
|
||||
mkdir -p ${{ matrix.os }}-${{ matrix.arch }}
|
||||
|
|
@ -505,6 +555,53 @@ jobs:
|
|||
name: digest-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}
|
||||
path: |
|
||||
${{ runner.temp }}/${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}.txt
|
||||
# Re-run buildx with --target archive against buildkit's local cache to
|
||||
# extract the release directory layout. All upstream stages were just
|
||||
# built above, so this is a cache-hit-only pass that just writes files.
|
||||
- uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
platforms: ${{ matrix.os }}/${{ matrix.arch }}
|
||||
target: archive
|
||||
provenance: false
|
||||
sbom: false
|
||||
build-args: ${{ matrix.build-args }}
|
||||
outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
cache-from: ${{ matrix.cache-from }}
|
||||
- name: Deduplicate CUDA libraries
|
||||
run: |
|
||||
./scripts/deduplicate_cuda_libs.sh dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
for COMPONENT in bin/* lib/ollama/*; do
|
||||
case "$COMPONENT" in
|
||||
bin/ollama*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/*.so*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/cuda_v*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/vulkan*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/mlx*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-mlx.tar.in ;;
|
||||
lib/ollama/include*) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
|
||||
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
|
||||
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
|
||||
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
|
||||
esac
|
||||
done
|
||||
working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
|
||||
- run: |
|
||||
echo "Manifests"
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in ; do
|
||||
echo $ARCHIVE
|
||||
cat $ARCHIVE
|
||||
done
|
||||
- run: |
|
||||
for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
|
||||
tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | zstd -19 -T0 >$(basename ${ARCHIVE//.*/}.tar.zst) &
|
||||
done
|
||||
wait
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: bundles-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.suffix }}
|
||||
path: |
|
||||
*.tar.zst
|
||||
|
||||
# Merge Docker images for the same flavor into a single multi-arch manifest
|
||||
docker-merge-push:
|
||||
|
|
@ -544,7 +641,7 @@ jobs:
|
|||
release:
|
||||
runs-on: ubuntu-latest
|
||||
environment: release
|
||||
needs: [darwin-build, windows-app, linux-build]
|
||||
needs: [darwin-build, windows-app, docker-build-push]
|
||||
permissions:
|
||||
contents: write
|
||||
env:
|
||||
|
|
|
|||
8
.github/workflows/test.yaml
vendored
8
.github/workflows/test.yaml
vendored
|
|
@ -65,7 +65,7 @@ jobs:
|
|||
- preset: 'MLX CUDA 13'
|
||||
container: nvidia/cuda:13.0.0-devel-ubuntu22.04
|
||||
extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
|
||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
|
||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DMLX_CUDA_ARCHITECTURES=80-virtual -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
|
||||
install-go: true
|
||||
runs-on: linux
|
||||
container: ${{ matrix.container }}
|
||||
|
|
@ -105,7 +105,7 @@ jobs:
|
|||
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}-${{ needs.changes.outputs.vendorsha }}
|
||||
- run: |
|
||||
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
|
||||
cmake --build --preset "${{ matrix.preset }}" --parallel
|
||||
cmake --build --preset "${{ matrix.preset }}" -- -l $(nproc)
|
||||
|
||||
windows:
|
||||
needs: [changes]
|
||||
|
|
@ -134,7 +134,7 @@ jobs:
|
|||
- preset: 'MLX CUDA 13'
|
||||
install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
|
||||
cudnn-install: https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.18.1.3_cuda13-archive.zip
|
||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=80 -DMLX_CUDA_ARCHITECTURES=80-virtual'
|
||||
cuda-components:
|
||||
- '"cudart"'
|
||||
- '"nvcc"'
|
||||
|
|
@ -240,7 +240,7 @@ jobs:
|
|||
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
|
||||
cmake --build --parallel --preset "${{ matrix.preset }}"
|
||||
cmake --build --preset "${{ matrix.preset }}" -- -l $([Environment]::ProcessorCount)
|
||||
env:
|
||||
CMAKE_GENERATOR: Ninja
|
||||
|
||||
|
|
|
|||
|
|
@ -228,15 +228,20 @@ if(MLX_ENGINE)
|
|||
list(APPEND MLX_INCLUDE_REGEXES "^dl\\.dll$")
|
||||
endif()
|
||||
|
||||
# Split mlx/mlxc libraries from runtime deps to avoid stripping deps
|
||||
install(TARGETS mlx mlxc
|
||||
RUNTIME_DEPENDENCIES
|
||||
DIRECTORIES ${MLX_RUNTIME_DIRS}
|
||||
PRE_INCLUDE_REGEXES ${MLX_INCLUDE_REGEXES}
|
||||
PRE_EXCLUDE_REGEXES ".*"
|
||||
RUNTIME_DEPENDENCY_SET mlx_runtime_deps
|
||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||
FRAMEWORK DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX
|
||||
)
|
||||
install(RUNTIME_DEPENDENCY_SET mlx_runtime_deps
|
||||
DIRECTORIES ${MLX_RUNTIME_DIRS}
|
||||
PRE_INCLUDE_REGEXES ${MLX_INCLUDE_REGEXES}
|
||||
PRE_EXCLUDE_REGEXES ".*"
|
||||
RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX_VENDOR
|
||||
LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT MLX_VENDOR
|
||||
)
|
||||
|
||||
if(TARGET jaccl)
|
||||
install(TARGETS jaccl
|
||||
|
|
@ -366,7 +371,7 @@ if(MLX_ENGINE)
|
|||
if(MLX_CUDA_LIBS)
|
||||
install(FILES ${MLX_CUDA_LIBS}
|
||||
DESTINATION ${OLLAMA_INSTALL_DIR}
|
||||
COMPONENT MLX)
|
||||
COMPONENT MLX_VENDOR)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@
|
|||
"name": "MLX CUDA 13",
|
||||
"inherits": [ "MLX", "CUDA 13" ],
|
||||
"cacheVariables": {
|
||||
"MLX_CUDA_ARCHITECTURES": "86;89;90;90a;100;103;75-virtual;80-virtual;110-virtual;120-virtual;121-virtual",
|
||||
"MLX_CUDA_ARCHITECTURES": "75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;90a-virtual;100-virtual;103-virtual;110-virtual;120-virtual;121-virtual",
|
||||
"OLLAMA_RUNNER_DIR": "mlx_cuda_v13"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
10
Dockerfile
10
Dockerfile
|
|
@ -144,6 +144,9 @@ RUN --mount=type=cache,target=/root/.ccache \
|
|||
|
||||
FROM base AS mlx
|
||||
ARG CUDA13VERSION=13.0
|
||||
# OLLAMA_MLX_BUILD_JOBS empty -> ninja gates by load average (-l $(nproc))
|
||||
ARG OLLAMA_MLX_BUILD_JOBS=
|
||||
ARG OLLAMA_MLX_NVCC_THREADS=2
|
||||
RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-} \
|
||||
&& dnf install -y openblas-devel lapack-devel \
|
||||
&& dnf install -y libcudnn9-cuda-13 libcudnn9-devel-cuda-13 \
|
||||
|
|
@ -170,9 +173,10 @@ RUN --mount=type=cache,target=/root/.ccache \
|
|||
&& if [ -f /tmp/local-mlx-c/CMakeLists.txt ]; then \
|
||||
export OLLAMA_MLX_C_SOURCE=/tmp/local-mlx-c; \
|
||||
fi \
|
||||
&& cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
|
||||
&& cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) \
|
||||
&& cmake --install build --component MLX --strip
|
||||
&& cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas -DCMAKE_CUDA_FLAGS="-t ${OLLAMA_MLX_NVCC_THREADS}" \
|
||||
&& cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) ${OLLAMA_MLX_BUILD_JOBS:+-j ${OLLAMA_MLX_BUILD_JOBS}} \
|
||||
&& cmake --install build --component MLX --strip \
|
||||
&& cmake --install build --component MLX_VENDOR
|
||||
|
||||
FROM base AS build
|
||||
WORKDIR /go/src/github.com/ollama/ollama
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ _build_darwin() {
|
|||
cmake --build $BUILD_DIR --target mlx mlxc -j
|
||||
cmake --install $BUILD_DIR --component CPU
|
||||
cmake --install $BUILD_DIR --component MLX
|
||||
cmake --install $BUILD_DIR --component MLX_VENDOR
|
||||
# Override CGO flags to point to the amd64 build directory
|
||||
MLX_CGO_CFLAGS="-O3 -mmacosx-version-min=14.0"
|
||||
MLX_CGO_LDFLAGS="-ldl -lc++ -framework Accelerate -mmacosx-version-min=14.0"
|
||||
|
|
@ -103,6 +104,7 @@ _build_darwin() {
|
|||
cmake --build $BUILD_DIR --target mlx mlxc --parallel
|
||||
_relink_mlx_metallib $BUILD_DIR
|
||||
cmake --install $BUILD_DIR --component MLX
|
||||
cmake --install $BUILD_DIR --component MLX_VENDOR
|
||||
|
||||
# Metal 4.x build (NAX-enabled, macOS 26+)
|
||||
# Only possible with Xcode 26+ SDK; skip on older toolchains.
|
||||
|
|
@ -124,6 +126,7 @@ _build_darwin() {
|
|||
-DFETCHCONTENT_SOURCE_DIR_METAL_CPP=$V3_DEPS/metal_cpp-src
|
||||
cmake --build $BUILD_DIR_V4 --target mlx mlxc --parallel
|
||||
cmake --install $BUILD_DIR_V4 --component MLX
|
||||
cmake --install $BUILD_DIR_V4 --component MLX_VENDOR
|
||||
else
|
||||
status "Skipping MLX Metal v4 (SDK $SDK_MAJOR < 26, need Xcode 26+)"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -1,9 +1,14 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder
|
||||
# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder.
|
||||
# Use the docker-container driver with the bundled buildkit GC config
|
||||
# for improved cache behavior
|
||||
#
|
||||
# docker context create amd64 --docker host=ssh://mybuildhost
|
||||
# docker buildx create --name mybuilder amd64 --platform linux/amd64
|
||||
# docker buildx create --name mybuilder \
|
||||
# --driver docker-container \
|
||||
# --config ./buildkitd.toml.example \
|
||||
# --bootstrap amd64 --platform linux/amd64
|
||||
# docker buildx create --name mybuilder --append desktop-linux --platform linux/arm64
|
||||
# docker buildx use mybuilder
|
||||
|
||||
|
|
@ -59,18 +64,28 @@ fi
|
|||
# buildx behavior changes for single vs. multiplatform
|
||||
echo "Compressing linux tar bundles..."
|
||||
if echo $PLATFORM | grep "," > /dev/null ; then
|
||||
tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64.tar.zst
|
||||
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
|
||||
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
|
||||
tar c -C ./dist/linux_amd64 --exclude rocm --exclude 'mlx*' --exclude include . | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64.tar.zst
|
||||
tar c -C ./dist/linux_amd64 ./lib/ollama/rocm | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
|
||||
tar c -C ./dist/linux_amd64 ./lib/ollama/mlx_cuda_v13 ./lib/ollama/include | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
|
||||
tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | zstd -9 -T0 >./dist/ollama-linux-arm64.tar.zst
|
||||
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
|
||||
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
|
||||
tar c -C ./dist/linux_amd64 --exclude rocm --exclude 'mlx*' . | zstd -9 -T0 >./dist/ollama-linux-amd64.tar.zst
|
||||
tar c -C ./dist/linux_amd64 ./lib/ollama/rocm | zstd -9 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
|
||||
( cd ./dist/linux_amd64 && tar c lib/ollama/mlx* ) | zstd -9 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
|
||||
elif echo $PLATFORM | grep "arm64" > /dev/null ; then
|
||||
tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/cuda_jetpack5 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/cuda_jetpack6 | zstd --ultra -22 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
|
||||
tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | zstd -9 -T0 >./dist/ollama-linux-arm64.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/cuda_jetpack5 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack5.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/cuda_jetpack6 | zstd -9 -T0 >./dist/ollama-linux-arm64-jetpack6.tar.zst
|
||||
elif echo $PLATFORM | grep "amd64" > /dev/null ; then
|
||||
tar c -C ./dist/ --exclude rocm --exclude 'mlx*' --exclude include bin lib | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/rocm | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/mlx_cuda_v13 ./lib/ollama/include | zstd --ultra -22 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
|
||||
tar c -C ./dist/ --exclude rocm --exclude 'mlx*' bin lib | zstd -9 -T0 >./dist/ollama-linux-amd64.tar.zst
|
||||
tar c -C ./dist/ ./lib/ollama/rocm | zstd -9 -T0 >./dist/ollama-linux-amd64-rocm.tar.zst
|
||||
( cd ./dist/ && tar c lib/ollama/mlx* ) | zstd -9 -T0 >./dist/ollama-linux-amd64-mlx.tar.zst
|
||||
fi
|
||||
|
||||
# Warn if any compressed tarball exceeds GitHub's 2 GiB release-asset limit
|
||||
LIMIT=2147483648
|
||||
for f in ./dist/ollama-linux-*.tar.zst; do
|
||||
[ -f "$f" ] || continue
|
||||
size=$(stat -f%z "$f" 2>/dev/null || stat -c%s "$f")
|
||||
if [ "$size" -gt "$LIMIT" ]; then
|
||||
echo "WARNING: $f is $size bytes ($((size - LIMIT)) over the 2 GiB GitHub release-asset limit)" >&2
|
||||
fi
|
||||
done
|
||||
|
|
|
|||
|
|
@ -308,6 +308,8 @@ function mlxCuda13 {
|
|||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build\mlx_cuda_v$cudaMajorVer --component "MLX" --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build\mlx_cuda_v$cudaMajorVer --component "MLX_VENDOR"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
} else {
|
||||
Write-Output "CUDA v$cudaMajorVer not detected, skipping MLX build"
|
||||
}
|
||||
|
|
@ -430,7 +432,7 @@ function newZipJob($sourceDir, $destZip) {
|
|||
Start-Job -ScriptBlock {
|
||||
param($src, $dst, $use7z)
|
||||
if ($use7z) {
|
||||
& 7z a -tzip -mx=9 -mmt=on $dst "${src}\*"
|
||||
& 7z a -tzip -mx=7 -mmt=on $dst "${src}\*"
|
||||
if ($LASTEXITCODE -ne 0) { throw "7z failed with exit code $LASTEXITCODE" }
|
||||
} else {
|
||||
Compress-Archive -CompressionLevel Optimal -Path "${src}\*" -DestinationPath $dst -Force
|
||||
|
|
|
|||
21
scripts/buildkitd.toml.example
Normal file
21
scripts/buildkitd.toml.example
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Suggested BuildKit GC config for ollama local development.
|
||||
#
|
||||
[worker.oci]
|
||||
gc = true
|
||||
gckeepstorage = "150GB"
|
||||
|
||||
[[worker.oci.gcpolicy]]
|
||||
filters = ["type==source.local", "type==source.git.checkout"]
|
||||
keepDuration = "48h"
|
||||
maxUsedSpace = "5GB"
|
||||
|
||||
[[worker.oci.gcpolicy]]
|
||||
filters = ["type==exec.cachemount"]
|
||||
keepDuration = "168h" # 7 days
|
||||
maxUsedSpace = "20GB"
|
||||
|
||||
[[worker.oci.gcpolicy]]
|
||||
keepDuration = "720h" # 30 days
|
||||
reservedSpace = "20GB"
|
||||
maxUsedSpace = "150GB"
|
||||
minFreeSpace = "50GB"
|
||||
|
|
@ -94,6 +94,47 @@ FetchContent_Declare(
|
|||
)
|
||||
FetchContent_MakeAvailable(mlx-c)
|
||||
|
||||
# To avoid a "long tail" when building MLX with a large set of GPU
|
||||
# architectures, utilize a higher --threads (-t) setting. At high -t
|
||||
# every .cu spawns concurrent cicc instances; each cicc can consume several GB
|
||||
# compiling MLX's CUTLASS-using kernels. This in turn can cause OOMs.
|
||||
#
|
||||
# We use a pool to cover all MLX CUDA sources. Pool size is derived from total
|
||||
# host RAM via a per-file memory budget.
|
||||
#
|
||||
# This was calibrated with `-t 6`. Higher -t may require overriding
|
||||
# MLX_CUDA_RAM_MB
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
file(GLOB_RECURSE _mlx_cu
|
||||
"${mlx_SOURCE_DIR}/mlx/backend/cuda/*.cu"
|
||||
"${mlx_BINARY_DIR}/mlx/backend/cuda/*.cu"
|
||||
)
|
||||
if(_mlx_cu)
|
||||
set(MLX_CUDA_RAM_MB 22000 CACHE STRING
|
||||
"Per-file memory budget (MB) for the cuda_compile JOB_POOL. Override for higher -t.")
|
||||
cmake_host_system_information(RESULT _ram_mb QUERY TOTAL_PHYSICAL_MEMORY)
|
||||
math(EXPR _cuda_pool "${_ram_mb} / ${MLX_CUDA_RAM_MB}")
|
||||
if(_cuda_pool LESS 2)
|
||||
set(_cuda_pool 2)
|
||||
endif()
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS cuda_compile=${_cuda_pool})
|
||||
list(LENGTH _mlx_cu _cu_count)
|
||||
# SOURCE properties default to directory-scoped, which means a plain
|
||||
# set_property(SOURCE ...) here would NOT affect the build rules
|
||||
# generated for the mlx target (defined in mlx_SOURCE_DIR after
|
||||
# FetchContent). TARGET_DIRECTORY mlx puts the property in the
|
||||
# directory where mlx was defined, so it actually applies.
|
||||
foreach(f ${_mlx_cu})
|
||||
set_property(SOURCE "${f}"
|
||||
TARGET_DIRECTORY mlx
|
||||
PROPERTY JOB_POOL_COMPILE cuda_compile)
|
||||
endforeach()
|
||||
message(STATUS "MLX cuda_compile JOB_POOL: ${_cu_count} files, pool size ${_cuda_pool} (host RAM ${_ram_mb} MB / ${MLX_CUDA_RAM_MB} MB per file)")
|
||||
else()
|
||||
message(WARNING "MLX cuda_compile JOB_POOL: no .cu files found under mlx/backend/cuda/ - check MLX layout")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Sync vendored headers with fetched version
|
||||
file(GLOB _mlx_c_hdrs "${mlx-c_SOURCE_DIR}/mlx/c/*.h")
|
||||
file(COPY ${_mlx_c_hdrs} DESTINATION "${CMAKE_SOURCE_DIR}/x/mlxrunner/mlx/include/mlx/c/")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue