# Deploys GitNexus indexes to a droplet via SSH + rsync. # # Architecture: # GitHub Actions (deploy) # 1. Resolves latest successful index runs for main, dev, and every # open PR that already has an index artifact (contributor-gated # upstream by the index workflow's author_association check) # 2. Downloads each matching .gitnexus/ artifact # 3. Rsyncs them into /opt/gitnexus/indexes// on the droplet # 4. Removes any stale folders on the droplet for PRs that closed # (even though gitnexus-cleanup-pr.yml also handles that path, # this is a safety net in case the close event was missed) # 5. Pulls latest image, force-recreates gitnexus, reloads Caddy, # and polls docker health until the container reports healthy # The caddy container is untouched — no TLS churn. # # First-time droplet bootstrap (run once, manually): # 1. Create 2GB+ Ubuntu 24.04 droplet, add SSH key # 2. Point DNS A record for your subdomain at the droplet IP # 3. SSH in and run: # curl -fsSL https://get.docker.com | sh # systemctl enable --now docker # mkdir -p /opt/gitnexus/indexes # useradd -m -s /bin/bash deploy # usermod -aG docker deploy # mkdir -p /home/deploy/.ssh # # Add deploy pubkey to /home/deploy/.ssh/authorized_keys # chown -R deploy:deploy /home/deploy/.ssh /opt/gitnexus # chmod 700 /home/deploy/.ssh # ufw allow 22,80,443/tcp # ufw --force enable # 4. Copy .do/gitnexus/docker-compose.yml and Caddyfile into /opt/gitnexus/ # 5. Create /opt/gitnexus/.env with: GITNEXUS_DOMAIN=... and API_TOKEN=... # 6. cd /opt/gitnexus && docker compose up -d # # Then capture the droplet's SSH host key from your workstation and # save it as the GITNEXUS_DO_KNOWN_HOST secret (below) so CI can pin it: # ssh-keyscan -H gitnexus.yourdomain.com # # GHCR image: the workflow runs `docker login ghcr.io` on the droplet # on every deploy using GITHUB_TOKEN, so the package can stay private. # If you'd rather not have CI manage droplet auth, make the package # public under repo Settings -> Packages. # # Required GitHub secrets: # GITNEXUS_DO_HOST — droplet IP or hostname # GITNEXUS_DO_USER — SSH user (e.g. "deploy") # GITNEXUS_DO_SSH_KEY — private key matching the authorized pubkey # GITNEXUS_DO_KNOWN_HOST — output of `ssh-keyscan -H ` pinning the # droplet's host keys (prevents MITM/TOFU risk) name: GitNexus Deploy on: workflow_run: workflows: ['GitNexus Index'] types: [completed] workflow_dispatch: inputs: pr_number: description: 'Optional PR number to post completion comment on (set by bot-triggered dispatches from gitnexus-index.yml)' type: string default: '' permissions: actions: read contents: read pull-requests: write # post completion comments on served PR indexes # Global serialization. Earlier versions used per-ref concurrency with # cancel-in-progress so rapid pushes to the same ref coalesced but deploys # targeting different refs ran in parallel. That had a data race: the # prune-stale-indexes step computes its active_names up front, so if # deploy A is rsyncing /opt/gitnexus/indexes/LibreChat-pr-12580 while # deploy B (started slightly later with a different ref) prunes, B can # rm -rf a folder A is still uploading into. # # All deploys now queue behind a single group. cancel-in-progress is # false so a running rsync/docker-compose restart never gets killed # mid-operation (which would leave the droplet in a partial state). # The 20-minute job timeout bounds total queue depth. concurrency: group: gitnexus-deploy cancel-in-progress: false env: GITNEXUS_VERSION: '1.6.5' IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/librechat-gitnexus jobs: # Rebuilds the long-lived image only when Dockerfile/entrypoint/extensions # change. Skipped on every other run, so index-only deploys are fast. build-image: if: | github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' runs-on: ubuntu-latest timeout-minutes: 20 permissions: contents: read packages: write # push image to GHCR outputs: image_tag: ${{ steps.tag.outputs.value }} steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 2 - name: Detect image changes id: changes run: | # Default to rebuild when we can't cleanly diff (first commit, # workflow_run from a PR branch where HEAD isn't the trigger, etc). # Rebuild on miss > skip when we should have rebuilt. if git rev-parse --verify HEAD~1 >/dev/null 2>&1 && \ git diff --quiet HEAD~1 HEAD -- .do/gitnexus/Dockerfile .do/gitnexus/entrypoint.sh .do/gitnexus/install-extensions.js; then echo "changed=false" >> "$GITHUB_OUTPUT" else echo "changed=true" >> "$GITHUB_OUTPUT" fi - name: Compute image tag id: tag run: echo "value=v${{ env.GITNEXUS_VERSION }}" >> "$GITHUB_OUTPUT" - name: Set up Docker Buildx if: steps.changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch' uses: docker/setup-buildx-action@v3 - name: Log in to GHCR if: steps.changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch' uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push image if: steps.changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch' uses: docker/build-push-action@v5 with: context: .do/gitnexus file: .do/gitnexus/Dockerfile push: true tags: | ${{ env.IMAGE_NAME }}:latest ${{ env.IMAGE_NAME }}:${{ steps.tag.outputs.value }} build-args: | GITNEXUS_VERSION=${{ env.GITNEXUS_VERSION }} cache-from: type=gha cache-to: type=gha,mode=max deploy: needs: build-image runs-on: ubuntu-latest timeout-minutes: 20 permissions: actions: read contents: read pull-requests: write # post deploy-complete comments on served PR indexes steps: - name: Checkout deploy config uses: actions/checkout@v4 with: sparse-checkout: .do/gitnexus fetch-depth: 1 # Resolve every index to serve. All resolutions go through # listArtifactsForRepo keyed by the expected artifact name, so a # run's branch or event type doesn't matter — we always pick the # freshest artifact that actually exists. # # Why this matters: a /gitnexus index command dispatches # gitnexus-index.yml with ref=main and an input pr_number, which # produces a run whose head_branch is "main" but whose artifact # is gitnexus-index-pr-. listWorkflowRuns(branch='main') would # happily return that run, and we'd then try to download a # nonexistent gitnexus-index-main artifact from it. Querying by # artifact name directly avoids the whole mess. - name: Resolve indexes to serve id: resolve uses: actions/github-script@v7 with: script: | const serve = []; // [{ name, artifactName, runId }] // Helper — pick the newest non-expired artifact matching a name. const latestArtifact = async (artifactName) => { const { data } = await github.rest.actions.listArtifactsForRepo({ owner: context.repo.owner, repo: context.repo.repo, name: artifactName, per_page: 10, }); return data.artifacts .filter((a) => !a.expired) .sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0]; }; // --- main and dev branches --- for (const [branch, name] of [ ['main', 'LibreChat'], ['dev', 'LibreChat-dev'], ]) { const artifactName = `gitnexus-index-${branch}`; const fresh = await latestArtifact(artifactName); if (!fresh) { core.warning(`No artifact found for ${branch} (expected ${artifactName})`); continue; } serve.push({ name, artifactName, runId: fresh.workflow_run.id, }); core.info(`${branch}: run ${fresh.workflow_run.id} -> ${name}`); } // --- open PRs with at least one successful index run --- // github.paginate handles the 100-per-page ceiling automatically // so the resolution works on repos with 200+ concurrent open PRs. const openPrs = await github.paginate(github.rest.pulls.list, { owner: context.repo.owner, repo: context.repo.repo, state: 'open', per_page: 100, }); core.info(`Found ${openPrs.length} open PRs`); // Parallelize artifact lookups in fixed-size batches so the // resolve step runs in seconds instead of minutes on big repos, // without burning the GitHub API rate limit all at once. const BATCH_SIZE = 10; const prMatches = []; for (let i = 0; i < openPrs.length; i += BATCH_SIZE) { const batch = openPrs.slice(i, i + BATCH_SIZE); const results = await Promise.all( batch.map(async (pr) => { const artifactName = `gitnexus-index-pr-${pr.number}`; const fresh = await latestArtifact(artifactName); return fresh ? { pr, artifactName, fresh } : null; }), ); for (const hit of results) { if (hit) prMatches.push(hit); } } // Cap to the N most recent PR indexes by artifact creation time. // On a 10GB droplet each index is ~130MB; 3 PRs + main + dev ≈ // 650MB of index data, leaving headroom for the ~700MB Docker image // and OS. Older PR indexes are evicted by the prune step. const MAX_PR_INDEXES = 3; prMatches.sort( (a, b) => new Date(b.fresh.created_at) - new Date(a.fresh.created_at), ); const keptPrs = prMatches.slice(0, MAX_PR_INDEXES); const evictedPrs = prMatches.slice(MAX_PR_INDEXES); for (const { pr, artifactName, fresh } of keptPrs) { serve.push({ name: `LibreChat-pr-${pr.number}`, artifactName, runId: fresh.workflow_run.id, }); core.info(`PR #${pr.number}: run ${fresh.workflow_run.id} -> LibreChat-pr-${pr.number}`); } if (evictedPrs.length) { core.info( `Evicted ${evictedPrs.length} older PR indexes (cap=${MAX_PR_INDEXES}): ` + evictedPrs.map((e) => `#${e.pr.number}`).join(', '), ); } core.info(`Serving ${keptPrs.length} PR indexes out of ${prMatches.length} with artifacts (${openPrs.length} open PRs total)`); if (!serve.length) { core.setFailed('No indexes to serve'); return; } core.setOutput('matrix', JSON.stringify(serve)); core.setOutput('active_names', serve.map((s) => s.name).join(',')); - name: Download each index artifact env: MATRIX: ${{ steps.resolve.outputs.matrix }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | set -e mkdir -p staging # main/dev artifact download failures are fatal — a missing # main/dev index is a real deploy failure. PR artifact failures # are soft — a PR artifact deleted mid-deploy shouldn't abort # the whole deploy and take main/dev down with it. echo "$MATRIX" | jq -c '.[]' | while read -r entry; do name=$(echo "$entry" | jq -r '.name') artifact=$(echo "$entry" | jq -r '.artifactName') runId=$(echo "$entry" | jq -r '.runId') target="staging/${name}/.gitnexus" echo "Downloading $artifact from run $runId -> $target" mkdir -p "$target" if ! gh run download "$runId" \ --repo "${{ github.repository }}" \ --name "$artifact" \ --dir "$target"; then case "$name" in LibreChat|LibreChat-dev) echo "::error::Failed to download critical artifact $artifact" exit 1 ;; *) # The name stays in active_names so the prune step # won't remove the droplet's existing copy. The old # index keeps being served instead of being wiped to # nothing — stale beats empty — but observability # requires an explicit notice since this path is # invisible in the happy-path deploy log. echo "::warning::Failed to download PR artifact $artifact — skipping fresh sync; previous index (if any) will continue being served from the droplet" rm -rf "staging/${name}" ;; esac fi done echo "" echo "Staged for rsync:" du -sh staging/*/.gitnexus/ 2>/dev/null || echo "(none)" - name: Setup SSH env: SSH_KEY: ${{ secrets.GITNEXUS_DO_SSH_KEY }} KNOWN_HOST: ${{ secrets.GITNEXUS_DO_KNOWN_HOST }} run: | set -e mkdir -p ~/.ssh chmod 700 ~/.ssh printf '%s\n' "$SSH_KEY" > ~/.ssh/deploy_key chmod 600 ~/.ssh/deploy_key # Pin the droplet's SSH host key from a repository secret instead # of trusting whatever ssh-keyscan returns at deploy time. The # secret is populated from `ssh-keyscan -H ` at bootstrap. if [ -z "$KNOWN_HOST" ]; then echo "::error::GITNEXUS_DO_KNOWN_HOST secret is empty. Run ssh-keyscan -H and paste the output as this secret." exit 1 fi printf '%s\n' "$KNOWN_HOST" > ~/.ssh/known_hosts chmod 600 ~/.ssh/known_hosts - name: Authenticate droplet with GHCR # GHCR packages pushed by GITHUB_TOKEN start private. The droplet # pulls the image on every deploy, so we re-authenticate it here # using the same short-lived token. If the package is public, this # step is redundant but harmless. # # The token MUST travel through SSH stdin (not as a command arg) # so it's never visible in the droplet's process table via # /proc//cmdline. `printf '%s'` is preferred over `echo` # so the exact byte sequence sent is explicit — docker login # tolerates a trailing newline but `printf` makes the intent # obvious and portable across shells. env: SSH_USER: ${{ secrets.GITNEXUS_DO_USER }} SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_ACTOR: ${{ github.actor }} run: | printf '%s' "$GH_TOKEN" | ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ "docker login ghcr.io -u '$GH_ACTOR' --password-stdin" - name: Upload config files env: SSH_USER: ${{ secrets.GITNEXUS_DO_USER }} SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }} run: | rsync -az -e "ssh -i ~/.ssh/deploy_key" \ .do/gitnexus/docker-compose.yml \ .do/gitnexus/Caddyfile \ "$SSH_USER@$SSH_HOST:/opt/gitnexus/" - name: Prune stale indexes then sync fresh ones env: SSH_USER: ${{ secrets.GITNEXUS_DO_USER }} SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }} ACTIVE_NAMES: ${{ steps.resolve.outputs.active_names }} run: | set -e # ── Step 1: prune FIRST ──────────────────────────────── # Remove any folders on the droplet that aren't in the active set. # This frees disk BEFORE rsyncing new data, which matters on a # 10GB disk where each index is ~130MB. echo "Pruning stale indexes (keeping: $ACTIVE_NAMES)" ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ ACTIVE_NAMES="$ACTIVE_NAMES" bash <<'REMOTE' set -e cd /opt/gitnexus/indexes || exit 0 shopt -s nullglob IFS=',' read -ra ACTIVE <<< "$ACTIVE_NAMES" for dir in */; do dir="${dir%/}" keep=false for a in "${ACTIVE[@]}"; do if [ "$dir" = "$a" ]; then keep=true; break; fi done if [ "$keep" = false ]; then echo "Removing stale index: $dir" rm -rf "$dir" fi done echo "Disk after prune:" df -h / | tail -1 REMOTE # ── Step 2: rsync-then-swap ───────────────────────────── # Upload each index to a temp directory, then atomically swap # it into place. If rsync fails, the old index survives intact # and the partial temp dir is cleaned up — no production data # is lost. The brief period where both old + new exist costs # ~130MB of extra disk, but the prune step already freed # space from evicted PR indexes so this fits on a 10GB disk. for dir in staging/*/; do [ -d "$dir" ] || continue name=$(basename "$dir") echo "Syncing $name (rsync-then-swap)" ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ "mkdir -p /opt/gitnexus/indexes/${name}.new" if rsync -az -e "ssh -i ~/.ssh/deploy_key" \ "$dir" \ "$SSH_USER@$SSH_HOST:/opt/gitnexus/indexes/${name}.new/"; then # Swap: remove old, rename new into place ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ "rm -rf /opt/gitnexus/indexes/$name && mv /opt/gitnexus/indexes/${name}.new /opt/gitnexus/indexes/$name" echo " $name swapped successfully" else # Clean up the partial temp dir ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" \ "rm -rf /opt/gitnexus/indexes/${name}.new" # main/dev are critical — abort the deploy so the failure # is visible and the container isn't restarted with stale # or missing data. PR indexes are best-effort. case "$name" in LibreChat|LibreChat-dev) echo "::error::rsync failed for critical index $name — aborting deploy" exit 1 ;; *) echo "::warning::rsync failed for PR index $name — keeping previous index" ;; esac fi done - name: Pull image, restart gitnexus, reload Caddy, wait for healthy env: SSH_USER: ${{ secrets.GITNEXUS_DO_USER }} SSH_HOST: ${{ secrets.GITNEXUS_DO_HOST }} run: | ssh -i ~/.ssh/deploy_key "$SSH_USER@$SSH_HOST" bash <<'REMOTE' set -e cd /opt/gitnexus # ── Disk cleanup ────────────────────────────────────── # Docker accumulates old image layers, dangling images, and # build cache across deploys. On a 60GB droplet with a 700MB+ # gitnexus image, this fills the disk after ~40 deploys. # Prune everything not used by currently-running containers # BEFORE pulling the new image so the extract has room. echo "Disk before cleanup:" df -h / | tail -1 # Omit --volumes: Caddy's caddy-data and caddy-config volumes # hold TLS certificates and ACME state. If Caddy happens to be # stopped when this runs (the workflow handles that case later), # --volumes would wipe them, forcing Let's Encrypt re-issuance # and risking rate-limit lockout (5 certs/domain/week). docker system prune -af 2>/dev/null || true echo "Disk after cleanup:" df -h / | tail -1 # Fail fast if disk is critically low even after prune AVAIL_MB=$(df --output=avail -m / | tail -1 | tr -d ' ') if [ "$AVAIL_MB" -lt 2048 ]; then echo "::error::Disk critically low (${AVAIL_MB}MB free). Aborting deploy." exit 1 fi docker compose pull gitnexus docker compose up -d --force-recreate gitnexus # Reload Caddy in-place so a changed Caddyfile takes effect # without losing TLS certs or restarting connections. If caddy # isn't running yet (first-time bootstrap), bring it up. if docker compose ps --status running caddy 2>/dev/null | grep -q caddy; then echo "Reloading Caddy config" docker compose exec -T caddy caddy reload --config /etc/caddy/Caddyfile || { echo "Caddy reload failed — forcing restart" docker compose up -d --force-recreate caddy } else echo "Caddy not running — starting" docker compose up -d caddy fi # Poll gitnexus health until ready or timeout. Docker's own # unhealthy detection takes up to 150s (start_period 60s + # retries 3 * interval 30s), so the poll ceiling must clear # that to avoid false negatives when gitnexus legitimately # takes ~2.5 min to warm up. # Max wait = 36 sleeps * 5s = 180s (final iteration exits # before its sleep on failure, so 37 iterations is the # correct upper bound for a true 180s ceiling). echo "Waiting for gitnexus to report healthy..." for i in $(seq 1 37); do STATUS=$(docker inspect --format='{{.State.Health.Status}}' gitnexus 2>/dev/null || echo unknown) echo "[$i/37] gitnexus health: $STATUS" if [ "$STATUS" = "healthy" ]; then echo "gitnexus is healthy" break fi if [ "$i" -eq 37 ]; then echo "ERROR: gitnexus failed to become healthy after 180s" docker compose ps docker compose logs --tail 80 gitnexus exit 1 fi sleep 5 done docker compose ps echo "--- Caddy logs (last 20 lines) ---" docker compose logs --tail 20 caddy || true echo "--- GitNexus logs (last 30 lines) ---" docker compose logs --tail 30 gitnexus || true REMOTE # When the deploy was triggered by a PR command path, post a # terminal status comment on that one PR only. Two sub-cases: # # 1. workflow_run trigger: the PR's native auto-index run fired # workflow_run, so github.event.workflow_run.id is the trigger. # Find the matching PR via the matrix entry whose runId matches. # # 2. workflow_dispatch trigger with inputs.pr_number set: the # index workflow's bot-fallback path dispatched us directly # because workflow_run is suppressed for GITHUB_TOKEN triggers. # Use inputs.pr_number as the comment target. # # Broadcast-commenting on every active PR would be noise — only the # PR that asked for a fresh index gets a reply. - name: Comment on PR — deploy complete if: always() uses: actions/github-script@v7 env: MATRIX: ${{ steps.resolve.outputs.matrix }} TRIGGER_RUN_ID: ${{ github.event.workflow_run.id }} DISPATCH_PR_NUMBER: ${{ github.event.inputs.pr_number }} DEPLOY_STATUS: ${{ job.status }} with: script: | let prNum = null; // Case 1: dispatched directly with pr_number (bot-fallback path) if (process.env.DISPATCH_PR_NUMBER && process.env.DISPATCH_PR_NUMBER !== '') { prNum = parseInt(process.env.DISPATCH_PR_NUMBER, 10); } // Case 2: workflow_run trigger from a PR index run else if (context.eventName === 'workflow_run') { const matrix = JSON.parse(process.env.MATRIX || '[]'); const triggerRunId = Number(process.env.TRIGGER_RUN_ID); const match = matrix.find( (m) => m.runId === triggerRunId && m.name.startsWith('LibreChat-pr-'), ); if (match) { prNum = parseInt(match.name.replace('LibreChat-pr-', ''), 10); } } if (!prNum) { core.info('No PR to comment on (trigger was not a PR-scoped index); skipping.'); return; } const deployUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; const ok = process.env.DEPLOY_STATUS === 'success'; const body = [ `### GitNexus: ${ok ? '🚀 deployed' : '❌ deploy failed'}`, '', ok ? `The \`LibreChat-pr-${prNum}\` index is now live on the MCP server.` : `The deploy failed — the previous index (if any) continues to be served.`, `[Deploy run](${deployUrl})`, ].join('\n'); await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: prNum, body, }); - name: Cleanup SSH key if: always() run: rm -f ~/.ssh/deploy_key