From bba91bfdd38d2d64424a9f358b8b8cd1e4f4c296 Mon Sep 17 00:00:00 2001 From: compass-admin Date: Thu, 23 Apr 2026 16:00:26 +0000 Subject: [PATCH] sync workflow from repo --- .gitea/workflows/ingest.yml | 94 +++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .gitea/workflows/ingest.yml diff --git a/.gitea/workflows/ingest.yml b/.gitea/workflows/ingest.yml new file mode 100644 index 0000000..aed155a --- /dev/null +++ b/.gitea/workflows/ingest.yml @@ -0,0 +1,94 @@ +name: ingest + +# Fires on every push to main. Computes the diff against the parent +# commit, filters to supported extensions, then invokes `compass-ingest +# run --files ` inside a container pulled from +# ${{ vars.COMPASS_APP_IMAGE }}. +# +# The CLI talks directly to postgres-rag + the raw Graphiti Neo4j — +# there is no HTTP hop through the long-running app service. + +on: + push: + branches: + - main + +jobs: + ingest: + runs-on: ubuntu-latest + container: + image: ${{ vars.COMPASS_APP_IMAGE }} + # Same network override + volume allow-list as deploy.yml. See + # scripts/runner-config.yaml for the rationale. + network: ${{ vars.COMPASS_NETWORK || 'compass_default' }} + env: + # Pgvector chunk store. + POSTGRES_RAG_DSN: ${{ secrets.POSTGRES_RAG_DSN }} + # Raw Graphiti graph (7688). These are NOT the structured-graph + # write creds — those only exist in compass/migrations. + NEO4J_URI: ${{ secrets.NEO4J_RAW_URI }} + NEO4J_USER: ${{ secrets.NEO4J_RAW_USER }} + NEO4J_PASSWORD: ${{ secrets.NEO4J_RAW_PASSWORD }} + + steps: + - name: Check out knowledge-base repo + uses: actions/checkout@v4 + with: + # Depth 2 gives us $GITHUB_SHA and its parent so `git diff` + # can run. The initial-commit path (parent == 000…) is + # handled below. + fetch-depth: 2 + + - name: Compute changed files + id: changes + shell: bash + run: | + set -euo pipefail + + BEFORE="${{ github.event.before }}" + AFTER="${{ github.sha }}" + + # Gitea (like GitHub) reports 40 zeros as the "before" of an + # initial push or a branch creation. Fall back to the full + # file listing so we don't silently skip the first batch. + if [ -z "$BEFORE" ] || [ "$BEFORE" = "0000000000000000000000000000000000000000" ]; then + echo "initial commit or branch creation — ingesting full tree" + git ls-files > /tmp/changed-all.txt + else + # `--diff-filter=d` drops deletions: we don't want to ingest + # a file that no longer exists. + git diff --name-only --diff-filter=d "$BEFORE".."$AFTER" > /tmp/changed-all.txt + fi + + # Filter to supported extensions. Keep this list in sync with + # the matcher in app/ingestion/pipeline.py. + grep -Ei '\.(md|txt|pdf|docx|xlsx)$' /tmp/changed-all.txt > /tmp/changed.txt || true + + count=$(wc -l < /tmp/changed.txt | tr -d ' ') + echo "changed_count=$count" >> "$GITHUB_OUTPUT" + echo "changed files:" + cat /tmp/changed.txt || true + + - name: Ingest changed files + if: steps.changes.outputs.changed_count != '0' + shell: bash + run: | + set -euo pipefail + # xargs feed avoids "argument list too long" on large pushes + # while still giving compass-ingest a single invocation so + # the shared DB/graph pools are reused. + mapfile -t files < /tmp/changed.txt + compass-ingest run --files "${files[@]}" + + - name: No-op summary + if: steps.changes.outputs.changed_count == '0' + run: echo "No supported files changed — nothing to ingest." + + - name: Upload ingest log + if: always() + # v4 uses GHES Artifacts API v2 which Gitea doesn't implement yet. + uses: actions/upload-artifact@v3 + with: + name: ingest-${{ github.sha }} + path: | + /tmp/changed.txt