mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-06-20 03:55:44 +00:00
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
GitNexus Index / index (push) Waiting to run
GitNexus Index / post-index (push) Blocked by required conditions
* ⏫ ci: Bump GitNexus to 1.6.7 to Fix Embeddings Index Timeout * ⏲️ ci: Raise GitNexus Index Timeout for 1.6.x Embedding Volume
600 lines
26 KiB
YAML
600 lines
26 KiB
YAML
# Deploys GitNexus indexes to a droplet via SSH + rsync.
|
|
#
|
|
# Architecture:
|
|
# GitHub Actions (deploy)
|
|
# 1. Resolves latest successful index runs for main, dev, and every
|
|
# open PR that already has an index artifact (contributor-gated
|
|
# upstream by the index workflow's author_association check)
|
|
# 2. Downloads each matching .gitnexus/ artifact
|
|
# 3. Rsyncs them into /opt/gitnexus/indexes/<name>/ on the droplet
|
|
# 4. Removes any stale folders on the droplet for PRs that closed
|
|
# (even though gitnexus-cleanup-pr.yml also handles that path,
|
|
# this is a safety net in case the close event was missed)
|
|
# 5. Pulls latest image, force-recreates gitnexus, reloads Caddy,
|
|
# and polls docker health until the container reports healthy
|
|
# The caddy container is untouched — no TLS churn.
|
|
#
|
|
# First-time droplet bootstrap (run once, manually):
|
|
# 1. Create 2GB+ Ubuntu 24.04 droplet, add SSH key
|
|
# 2. Point DNS A record for your subdomain at the droplet IP
|
|
# 3. SSH in and run:
|
|
# curl -fsSL https://get.docker.com | sh
|
|
# systemctl enable --now docker
|
|
# mkdir -p /opt/gitnexus/indexes
|
|
# useradd -m -s /bin/bash deploy
|
|
# usermod -aG docker deploy
|
|
# mkdir -p /home/deploy/.ssh
|
|
# # Add deploy pubkey to /home/deploy/.ssh/authorized_keys
|
|
# chown -R deploy:deploy /home/deploy/.ssh /opt/gitnexus
|
|
# chmod 700 /home/deploy/.ssh
|
|
# ufw allow 22,80,443/tcp
|
|
# ufw --force enable
|
|
# 4. Copy .do/gitnexus/docker-compose.yml and Caddyfile into /opt/gitnexus/
|
|
# 5. Create /opt/gitnexus/.env with: GITNEXUS_DOMAIN=... and API_TOKEN=...
|
|
# 6. cd /opt/gitnexus && docker compose up -d
|
|
#
|
|
# Then capture the droplet's SSH host key from your workstation and
|
|
# save it as the GITNEXUS_DO_KNOWN_HOST secret (below) so CI can pin it:
|
|
# ssh-keyscan -H gitnexus.yourdomain.com
|
|
#
|
|
# GHCR image: the workflow runs `docker login ghcr.io` on the droplet
|
|
# on every deploy using GITHUB_TOKEN, so the package can stay private.
|
|
# If you'd rather not have CI manage droplet auth, make the package
|
|
# public under repo Settings -> Packages.
|
|
#
|
|
# Required GitHub secrets:
|
|
# GITNEXUS_DO_HOST — droplet IP or hostname
|
|
# GITNEXUS_DO_USER — SSH user (e.g. "deploy")
|
|
# GITNEXUS_DO_SSH_KEY — private key matching the authorized pubkey
|
|
# GITNEXUS_DO_KNOWN_HOST — output of `ssh-keyscan -H <host>` pinning the
|
|
# droplet's host keys (prevents MITM/TOFU risk)
|
|
|
|
name: GitNexus Deploy
|
|
|
|
on:
|
|
workflow_run:
|
|
workflows: ['GitNexus Index']
|
|
types: [completed]
|
|
workflow_dispatch:
|
|
inputs:
|
|
pr_number:
|
|
description: 'Optional PR number to post completion comment on (set by bot-triggered dispatches from gitnexus-index.yml)'
|
|
type: string
|
|
default: ''
|
|
|
|
permissions:
|
|
actions: read
|
|
contents: read
|
|
pull-requests: write # post completion comments on served PR indexes
|
|
|
|
# Global serialization. Earlier versions used per-ref concurrency with
|
|
# cancel-in-progress so rapid pushes to the same ref coalesced but deploys
|
|
# targeting different refs ran in parallel. That had a data race: the
|
|
# prune-stale-indexes step computes its active_names up front, so if
|
|
# deploy A is rsyncing /opt/gitnexus/indexes/LibreChat-pr-12580 while
|
|
# deploy B (started slightly later with a different ref) prunes, B can
|
|
# rm -rf a folder A is still uploading into.
|
|
#
|
|
# All deploys now queue behind a single group. cancel-in-progress is
|
|
# false so a running rsync/docker-compose restart never gets killed
|
|
# mid-operation (which would leave the droplet in a partial state).
|
|
# The 20-minute job timeout bounds total queue depth.
|
|
concurrency:
|
|
group: gitnexus-deploy
|
|
cancel-in-progress: false
|
|
|
|
env:
|
|
GITNEXUS_VERSION: '1.6.7'
|
|
IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/librechat-gitnexus
|
|
|
|
jobs:
|
|
# Rebuilds the long-lived image only when Dockerfile/entrypoint/extensions
|
|
# change. Skipped on every other run, so index-only deploys are fast.
|
|
build-image:
|
|
if: |
|
|
github.event_name == 'workflow_dispatch' ||
|
|
github.event.workflow_run.conclusion == 'success'
|
|
runs-on: ubuntu-latest
|
|
timeout-minutes: 20
|
|
permissions:
|
|
contents: read
|
|
packages: write # push image to GHCR
|
|
outputs:
|
|
image_tag: ${{ steps.tag.outputs.value }}
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 2
|
|
|
|
- name: Detect image changes
|
|
id: changes
|
|
run: |
|
|
# Default to rebuild when we can't cleanly diff (first commit,
|
|
# workflow_run from a PR branch where HEAD isn't the trigger, etc).
|
|
# Rebuild on miss > skip when we should have rebuilt.
|
|
if git rev-parse --verify HEAD~1 >/dev/null 2>&1 && \
|
|
git diff --quiet HEAD~1 HEAD -- .do/gitnexus/Dockerfile .do/gitnexus/entrypoint.sh .do/gitnexus/install-extensions.js; then
|
|
echo "changed=false" >> "$GITHUB_OUTPUT"
|
|
else
|
|
echo "changed=true" >> "$GITHUB_OUTPUT"
|
|
fi
|
|
|
|
- name: Compute image tag
|
|
id: tag
|
|
run: echo "value=v${{ env.GITNEXUS_VERSION }}" >> "$GITHUB_OUTPUT"
|
|
|
|
- name: Set up Docker Buildx
|
|
if: steps.changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch'
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Log in to GHCR
|
|
if: steps.changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch'
|
|
uses: docker/login-action@v3
|
|
with:
|
|
registry: ghcr.io
|
|
username: ${{ github.actor }}
|
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Build and push image
|
|
if: steps.changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch'
|
|
uses: docker/build-push-action@v5
|
|
with:
|
|
context: .do/gitnexus
|
|
file: .do/gitnexus/Dockerfile
|
|
push: true
|
|
tags: |
|
|
${{ env.IMAGE_NAME }}:latest
|
|
${{ env.IMAGE_NAME }}:${{ steps.tag.outputs.value }}
|
|
build-args: |
|
|
GITNEXUS_VERSION=${{ env.GITNEXUS_VERSION }}
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
|
|
deploy:
|
|
needs: build-image
|
|
runs-on: ubuntu-latest
|
|
timeout-minutes: 20
|
|
permissions:
|
|
actions: read
|
|
contents: read
|
|
pull-requests: write # post deploy-complete comments on served PR indexes
|
|
steps:
|
|
- name: Checkout deploy config
|
|
uses: actions/checkout@v4
|
|
with:
|
|
sparse-checkout: .do/gitnexus
|
|
fetch-depth: 1
|
|
|
|
# Resolve every index to serve. All resolutions go through
|
|
# listArtifactsForRepo keyed by the expected artifact name, so a
|
|
# run's branch or event type doesn't matter — we always pick the
|
|
# freshest artifact that actually exists.
|
|
#
|
|
# Why this matters: a /gitnexus index command dispatches
|
|
# gitnexus-index.yml with ref=main and an input pr_number, which
|
|
# produces a run whose head_branch is "main" but whose artifact
|
|
# is gitnexus-index-pr-<N>. listWorkflowRuns(branch='main') would
|
|
# happily return that run, and we'd then try to download a
|
|
# nonexistent gitnexus-index-main artifact from it. Querying by
|
|
# artifact name directly avoids the whole mess.
|
|
- name: Resolve indexes to serve
|
|
id: resolve
|
|
uses: actions/github-script@v7
|
|
with:
|
|
script: |
|
|
const serve = []; // [{ name, artifactName, runId }]
|
|
|
|
// Helper — pick the newest non-expired artifact matching a name.
|
|
const latestArtifact = async (artifactName) => {
|
|
const { data } = await github.rest.actions.listArtifactsForRepo({
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
name: artifactName,
|
|
per_page: 10,
|
|
});
|
|
return data.artifacts
|
|
.filter((a) => !a.expired)
|
|
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0];
|
|
};
|
|
|
|
// --- main and dev branches ---
|
|
for (const [branch, name] of [
|
|
['main', 'LibreChat'],
|
|
['dev', 'LibreChat-dev'],
|
|
]) {
|
|
const artifactName = `gitnexus-index-${branch}`;
|
|
const fresh = await latestArtifact(artifactName);
|
|
if (!fresh) {
|
|
core.warning(`No artifact found for ${branch} (expected ${artifactName})`);
|
|
continue;
|
|
}
|
|
serve.push({
|
|
name,
|
|
artifactName,
|
|
runId: fresh.workflow_run.id,
|
|
});
|
|
core.info(`${branch}: run ${fresh.workflow_run.id} -> ${name}`);
|
|
}
|
|
|
|
// --- open PRs with at least one successful index run ---
|
|
// github.paginate handles the 100-per-page ceiling automatically
|
|
// so the resolution works on repos with 200+ concurrent open PRs.
|
|
const openPrs = await github.paginate(github.rest.pulls.list, {
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
state: 'open',
|
|
per_page: 100,
|
|
});
|
|
core.info(`Found ${openPrs.length} open PRs`);
|
|
|
|
// Parallelize artifact lookups in fixed-size batches so the
|
|
// resolve step runs in seconds instead of minutes on big repos,
|
|
// without burning the GitHub API rate limit all at once.
|
|
const BATCH_SIZE = 10;
|
|
const prMatches = [];
|
|
for (let i = 0; i < openPrs.length; i += BATCH_SIZE) {
|
|
const batch = openPrs.slice(i, i + BATCH_SIZE);
|
|
const results = await Promise.all(
|
|
batch.map(async (pr) => {
|
|
const artifactName = `gitnexus-index-pr-${pr.number}`;
|
|
const fresh = await latestArtifact(artifactName);
|
|
return fresh ? { pr, artifactName, fresh } : null;
|
|
}),
|
|
);
|
|
for (const hit of results) {
|
|
if (hit) prMatches.push(hit);
|
|
}
|
|
}
|
|
|
|
// Cap to the N most recent PR indexes by artifact creation time.
|
|
// On a 10GB droplet each index is ~130MB; 3 PRs + main + dev ≈
|
|
// 650MB of index data, leaving headroom for the ~700MB Docker image
|
|
// and OS. Older PR indexes are evicted by the prune step.
|
|
const MAX_PR_INDEXES = 3;
|
|
prMatches.sort(
|
|
(a, b) => new Date(b.fresh.created_at) - new Date(a.fresh.created_at),
|
|
);
|
|
const keptPrs = prMatches.slice(0, MAX_PR_INDEXES);
|
|
const evictedPrs = prMatches.slice(MAX_PR_INDEXES);
|
|
|
|
for (const { pr, artifactName, fresh } of keptPrs) {
|
|
serve.push({
|
|
name: `LibreChat-pr-${pr.number}`,
|
|
artifactName,
|
|
runId: fresh.workflow_run.id,
|
|
});
|
|
core.info(`PR #${pr.number}: run ${fresh.workflow_run.id} -> LibreChat-pr-${pr.number}`);
|
|
}
|
|
if (evictedPrs.length) {
|
|
core.info(
|
|
`Evicted ${evictedPrs.length} older PR indexes (cap=${MAX_PR_INDEXES}): ` +
|
|
evictedPrs.map((e) => `#${e.pr.number}`).join(', '),
|
|
);
|
|
}
|
|
core.info(`Serving ${keptPrs.length} PR indexes out of ${prMatches.length} with artifacts (${openPrs.length} open PRs total)`);
|
|
|
|
if (!serve.length) {
|
|
core.setFailed('No indexes to serve');
|
|
return;
|
|
}
|
|
|
|
core.setOutput('matrix', JSON.stringify(serve));
|
|
core.setOutput('active_names', serve.map((s) => s.name).join(','));
|
|
|
|
- name: Download each index artifact
|
|
env:
|
|
MATRIX: ${{ steps.resolve.outputs.matrix }}
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
run: |
|
|
set -e
|
|
mkdir -p staging
|
|
# main/dev artifact download failures are fatal — a missing
|
|
# main/dev index is a real deploy failure. PR artifact failures
|
|
# are soft — a PR artifact deleted mid-deploy shouldn't abort
|
|
# the whole deploy and take main/dev down with it.
|
|
echo "$MATRIX" | jq -c '.[]' | while read -r entry; do
|
|
name=$(echo "$entry" | jq -r '.name')
|
|
artifact=$(echo "$entry" | jq -r '.artifactName')
|
|
runId=$(echo "$entry" | jq -r '.runId')
|
|
target="staging/${name}/.gitnexus"
|
|
echo "Downloading $artifact from run $runId -> $target"
|
|
mkdir -p "$target"
|
|
if ! gh run download "$runId" \
|
|
--repo "${{ github.repository }}" \
|
|
--name "$artifact" \
|
|
--dir "$target"; then
|
|
case "$name" in
|
|
LibreChat|LibreChat-dev)
|
|
echo "::error::Failed to download critical artifact $artifact"
|
|
exit 1
|
|
;;
|
|
*)
|
|
# The name stays in active_names so the prune step
|
|
# won't remove the droplet's existing copy. The old
|
|
# index keeps being served instead of being wiped to
|
|
# nothing — stale beats empty — but observability
|
|
# requires an explicit notice since this path is
|
|
# invisible in the happy-path deploy log.
|
|
echo "::warning::Failed to download PR artifact $artifact — skipping fresh sync; previous index (if any) will continue being served from the droplet"
|
|
rm -rf "staging/${name}"
|
|
;;
|
|
esac
|
|
fi
|
|
done
|
|
echo ""
|
|
echo "Staged for rsync:"
|
|
du -sh staging/*/.gitnexus/ 2>/dev/null || echo "(none)"
|
|
|
|
- name: Setup SSH
|
|
env:
|
|
SSH_KEY: ${{ secrets.GITNEXUS_DO_SSH_KEY }}
|
|
KNOWN_HOST: ${{ secrets.GITNEXUS_DO_KNOWN_HOST }}
|
|
run: |
|
|
set -e
|
|
mkdir -p ~/.ssh
|
|
chmod 700 ~/.ssh
|
|
printf '%s\n' "$SSH_KEY" > ~/.ssh/deploy_key
|
|
chmod 600 ~/.ssh/deploy_key
|
|
# Pin the droplet's SSH host key from a repository secret instead
|
|
# of trusting whatever ssh-keyscan returns at deploy time. The
|
|
# secret is populated from `ssh-keyscan -H <host>` at bootstrap.
|
|
if [ -z "$KNOWN_HOST" ]; then
|
|
echo "::error::GITNEXUS_DO_KNOWN_HOST secret is empty. Run ssh-keyscan -H <host> and paste the output as this secret."
|
|
exit 1
|
|
fi
|
|
printf '%s\n' "$KNOWN_HOST" > ~/.ssh/known_hosts
|
|
chmod 600 ~/.ssh/known_hosts
|
|
|
|
- name: Authenticate droplet with GHCR
|
|
# GHCR packages pushed by GITHUB_TOKEN start private. The droplet
|
|
# pulls the image on every deploy, so we re-authenticate it here
|
|
# using the same short-lived token. If the package is public, this
|
|
# step is redundant but harmless.
|
|
#
|
|
# The token MUST travel through SSH stdin (not as a command arg)
|
|
# so it's never visible in the droplet's process table via
|
|
# /proc/<pid>/cmdline. `printf '%s'` is preferred over `echo`
|
|
# so the exact byte sequence sent is explicit — docker login
|
|
# tolerates a trailing newline but `printf` makes the intent
|
|
# obvious and portable across shells.
|
|
env:
|
|
SSH_USER: ${{ secrets.GITNEXUS_DO_USER }}
|
|
SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }}
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
GH_ACTOR: ${{ github.actor }}
|
|
run: |
|
|
printf '%s' "$GH_TOKEN" | ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \
|
|
"docker login ghcr.io -u '$GH_ACTOR' --password-stdin"
|
|
|
|
- name: Upload config files
|
|
env:
|
|
SSH_USER: ${{ secrets.GITNEXUS_DO_USER }}
|
|
SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }}
|
|
run: |
|
|
rsync -az -e "ssh -i ~/.ssh/deploy_key" \
|
|
.do/gitnexus/docker-compose.yml \
|
|
.do/gitnexus/Caddyfile \
|
|
"$SSH_USER@$SSH_HOST:/opt/gitnexus/"
|
|
|
|
- name: Prune stale indexes then sync fresh ones
|
|
env:
|
|
SSH_USER: ${{ secrets.GITNEXUS_DO_USER }}
|
|
SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }}
|
|
ACTIVE_NAMES: ${{ steps.resolve.outputs.active_names }}
|
|
run: |
|
|
set -e
|
|
# ── Step 1: prune FIRST ────────────────────────────────
|
|
# Remove any folders on the droplet that aren't in the active set.
|
|
# This frees disk BEFORE rsyncing new data, which matters on a
|
|
# 10GB disk where each index is ~130MB.
|
|
echo "Pruning stale indexes (keeping: $ACTIVE_NAMES)"
|
|
ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \
|
|
ACTIVE_NAMES="$ACTIVE_NAMES" bash <<'REMOTE'
|
|
set -e
|
|
cd /opt/gitnexus/indexes || exit 0
|
|
shopt -s nullglob
|
|
IFS=',' read -ra ACTIVE <<< "$ACTIVE_NAMES"
|
|
for dir in */; do
|
|
dir="${dir%/}"
|
|
keep=false
|
|
for a in "${ACTIVE[@]}"; do
|
|
if [ "$dir" = "$a" ]; then keep=true; break; fi
|
|
done
|
|
if [ "$keep" = false ]; then
|
|
echo "Removing stale index: $dir"
|
|
rm -rf "$dir"
|
|
fi
|
|
done
|
|
echo "Disk after prune:"
|
|
df -h / | tail -1
|
|
REMOTE
|
|
|
|
# ── Step 2: rsync-then-swap ─────────────────────────────
|
|
# Upload each index to a temp directory, then atomically swap
|
|
# it into place. If rsync fails, the old index survives intact
|
|
# and the partial temp dir is cleaned up — no production data
|
|
# is lost. The brief period where both old + new exist costs
|
|
# ~130MB of extra disk, but the prune step already freed
|
|
# space from evicted PR indexes so this fits on a 10GB disk.
|
|
for dir in staging/*/; do
|
|
[ -d "$dir" ] || continue
|
|
name=$(basename "$dir")
|
|
echo "Syncing $name (rsync-then-swap)"
|
|
ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \
|
|
"mkdir -p /opt/gitnexus/indexes/${name}.new"
|
|
if rsync -az -e "ssh -i ~/.ssh/deploy_key" \
|
|
"$dir" \
|
|
"$SSH_USER@$SSH_HOST:/opt/gitnexus/indexes/${name}.new/"; then
|
|
# Swap: remove old, rename new into place
|
|
ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \
|
|
"rm -rf /opt/gitnexus/indexes/$name && mv /opt/gitnexus/indexes/${name}.new /opt/gitnexus/indexes/$name"
|
|
echo " $name swapped successfully"
|
|
else
|
|
# Clean up the partial temp dir
|
|
ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \
|
|
"rm -rf /opt/gitnexus/indexes/${name}.new"
|
|
# main/dev are critical — abort the deploy so the failure
|
|
# is visible and the container isn't restarted with stale
|
|
# or missing data. PR indexes are best-effort.
|
|
case "$name" in
|
|
LibreChat|LibreChat-dev)
|
|
echo "::error::rsync failed for critical index $name — aborting deploy"
|
|
exit 1
|
|
;;
|
|
*)
|
|
echo "::warning::rsync failed for PR index $name — keeping previous index"
|
|
;;
|
|
esac
|
|
fi
|
|
done
|
|
|
|
- name: Pull image, restart gitnexus, reload Caddy, wait for healthy
|
|
env:
|
|
SSH_USER: ${{ secrets.GITNEXUS_DO_USER }}
|
|
SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }}
|
|
run: |
|
|
ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" bash <<'REMOTE'
|
|
set -e
|
|
cd /opt/gitnexus
|
|
|
|
# ── Disk cleanup ──────────────────────────────────────
|
|
# Docker accumulates old image layers, dangling images, and
|
|
# build cache across deploys. On a 60GB droplet with a 700MB+
|
|
# gitnexus image, this fills the disk after ~40 deploys.
|
|
# Prune everything not used by currently-running containers
|
|
# BEFORE pulling the new image so the extract has room.
|
|
echo "Disk before cleanup:"
|
|
df -h / | tail -1
|
|
# Omit --volumes: Caddy's caddy-data and caddy-config volumes
|
|
# hold TLS certificates and ACME state. If Caddy happens to be
|
|
# stopped when this runs (the workflow handles that case later),
|
|
# --volumes would wipe them, forcing Let's Encrypt re-issuance
|
|
# and risking rate-limit lockout (5 certs/domain/week).
|
|
docker system prune -af 2>/dev/null || true
|
|
echo "Disk after cleanup:"
|
|
df -h / | tail -1
|
|
|
|
# Fail fast if disk is critically low even after prune
|
|
AVAIL_MB=$(df --output=avail -m / | tail -1 | tr -d ' ')
|
|
if [ "$AVAIL_MB" -lt 2048 ]; then
|
|
echo "::error::Disk critically low (${AVAIL_MB}MB free). Aborting deploy."
|
|
exit 1
|
|
fi
|
|
|
|
docker compose pull gitnexus
|
|
docker compose up -d --force-recreate gitnexus
|
|
|
|
# Reload Caddy in-place so a changed Caddyfile takes effect
|
|
# without losing TLS certs or restarting connections. If caddy
|
|
# isn't running yet (first-time bootstrap), bring it up.
|
|
if docker compose ps --status running caddy 2>/dev/null | grep -q caddy; then
|
|
echo "Reloading Caddy config"
|
|
docker compose exec -T caddy caddy reload --config /etc/caddy/Caddyfile || {
|
|
echo "Caddy reload failed — forcing restart"
|
|
docker compose up -d --force-recreate caddy
|
|
}
|
|
else
|
|
echo "Caddy not running — starting"
|
|
docker compose up -d caddy
|
|
fi
|
|
|
|
# Poll gitnexus health until ready or timeout. Docker's own
|
|
# unhealthy detection takes up to 150s (start_period 60s +
|
|
# retries 3 * interval 30s), so the poll ceiling must clear
|
|
# that to avoid false negatives when gitnexus legitimately
|
|
# takes ~2.5 min to warm up.
|
|
# Max wait = 36 sleeps * 5s = 180s (final iteration exits
|
|
# before its sleep on failure, so 37 iterations is the
|
|
# correct upper bound for a true 180s ceiling).
|
|
echo "Waiting for gitnexus to report healthy..."
|
|
for i in $(seq 1 37); do
|
|
STATUS=$(docker inspect --format='{{.State.Health.Status}}' gitnexus 2>/dev/null || echo unknown)
|
|
echo "[$i/37] gitnexus health: $STATUS"
|
|
if [ "$STATUS" = "healthy" ]; then
|
|
echo "gitnexus is healthy"
|
|
break
|
|
fi
|
|
if [ "$i" -eq 37 ]; then
|
|
echo "ERROR: gitnexus failed to become healthy after 180s"
|
|
docker compose ps
|
|
docker compose logs --tail 80 gitnexus
|
|
exit 1
|
|
fi
|
|
sleep 5
|
|
done
|
|
|
|
docker compose ps
|
|
echo "--- Caddy logs (last 20 lines) ---"
|
|
docker compose logs --tail 20 caddy || true
|
|
echo "--- GitNexus logs (last 30 lines) ---"
|
|
docker compose logs --tail 30 gitnexus || true
|
|
REMOTE
|
|
|
|
# When the deploy was triggered by a PR command path, post a
|
|
# terminal status comment on that one PR only. Two sub-cases:
|
|
#
|
|
# 1. workflow_run trigger: the PR's native auto-index run fired
|
|
# workflow_run, so github.event.workflow_run.id is the trigger.
|
|
# Find the matching PR via the matrix entry whose runId matches.
|
|
#
|
|
# 2. workflow_dispatch trigger with inputs.pr_number set: the
|
|
# index workflow's bot-fallback path dispatched us directly
|
|
# because workflow_run is suppressed for GITHUB_TOKEN triggers.
|
|
# Use inputs.pr_number as the comment target.
|
|
#
|
|
# Broadcast-commenting on every active PR would be noise — only the
|
|
# PR that asked for a fresh index gets a reply.
|
|
- name: Comment on PR — deploy complete
|
|
if: always()
|
|
uses: actions/github-script@v7
|
|
env:
|
|
MATRIX: ${{ steps.resolve.outputs.matrix }}
|
|
TRIGGER_RUN_ID: ${{ github.event.workflow_run.id }}
|
|
DISPATCH_PR_NUMBER: ${{ github.event.inputs.pr_number }}
|
|
DEPLOY_STATUS: ${{ job.status }}
|
|
with:
|
|
script: |
|
|
let prNum = null;
|
|
|
|
// Case 1: dispatched directly with pr_number (bot-fallback path)
|
|
if (process.env.DISPATCH_PR_NUMBER && process.env.DISPATCH_PR_NUMBER !== '') {
|
|
prNum = parseInt(process.env.DISPATCH_PR_NUMBER, 10);
|
|
}
|
|
// Case 2: workflow_run trigger from a PR index run
|
|
else if (context.eventName === 'workflow_run') {
|
|
const matrix = JSON.parse(process.env.MATRIX || '[]');
|
|
const triggerRunId = Number(process.env.TRIGGER_RUN_ID);
|
|
const match = matrix.find(
|
|
(m) => m.runId === triggerRunId && m.name.startsWith('LibreChat-pr-'),
|
|
);
|
|
if (match) {
|
|
prNum = parseInt(match.name.replace('LibreChat-pr-', ''), 10);
|
|
}
|
|
}
|
|
|
|
if (!prNum) {
|
|
core.info('No PR to comment on (trigger was not a PR-scoped index); skipping.');
|
|
return;
|
|
}
|
|
|
|
const deployUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
|
const ok = process.env.DEPLOY_STATUS === 'success';
|
|
const body = [
|
|
`### GitNexus: ${ok ? '🚀 deployed' : '❌ deploy failed'}`,
|
|
'',
|
|
ok
|
|
? `The \`LibreChat-pr-${prNum}\` index is now live on the MCP server.`
|
|
: `The deploy failed — the previous index (if any) continues to be served.`,
|
|
`[Deploy run](${deployUrl})`,
|
|
].join('\n');
|
|
await github.rest.issues.createComment({
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
issue_number: prNum,
|
|
body,
|
|
});
|
|
|
|
- name: Cleanup SSH key
|
|
if: always()
|
|
run: rm -f ~/.ssh/deploy_key
|