name: ingest # Fires on every push to main. Computes the diff against the parent # commit, filters to supported extensions, then invokes `compass-ingest # run --files ` inside a container pulled from # ${{ vars.COMPASS_APP_IMAGE }}. # # The CLI talks directly to postgres-rag + the raw Graphiti Neo4j — # there is no HTTP hop through the long-running app service. on: push: branches: - main jobs: ingest: runs-on: ubuntu-latest container: image: ${{ vars.COMPASS_APP_IMAGE }} # Same network override + volume allow-list as deploy.yml. See # scripts/runner-config.yaml for the rationale. network: ${{ vars.COMPASS_NETWORK || 'compass_default' }} env: # Pgvector chunk store. POSTGRES_RAG_DSN: ${{ secrets.POSTGRES_RAG_DSN }} # Raw Graphiti graph (7688). These are NOT the structured-graph # write creds — those only exist in compass/migrations. NEO4J_URI: ${{ secrets.NEO4J_RAW_URI }} NEO4J_USER: ${{ secrets.NEO4J_RAW_USER }} NEO4J_PASSWORD: ${{ secrets.NEO4J_RAW_PASSWORD }} steps: - name: Check out knowledge-base repo uses: actions/checkout@v4 with: # Depth 2 gives us $GITHUB_SHA and its parent so `git diff` # can run. The initial-commit path (parent == 000…) is # handled below. fetch-depth: 2 - name: Compute changed files id: changes shell: bash run: | set -euo pipefail BEFORE="${{ github.event.before }}" AFTER="${{ github.sha }}" # Gitea (like GitHub) reports 40 zeros as the "before" of an # initial push or a branch creation. Fall back to the full # file listing so we don't silently skip the first batch. if [ -z "$BEFORE" ] || [ "$BEFORE" = "0000000000000000000000000000000000000000" ]; then echo "initial commit or branch creation — ingesting full tree" git ls-files > /tmp/changed-all.txt else # `--diff-filter=d` drops deletions: we don't want to ingest # a file that no longer exists. git diff --name-only --diff-filter=d "$BEFORE".."$AFTER" > /tmp/changed-all.txt fi # Filter to supported extensions. Keep this list in sync with # the matcher in app/ingestion/pipeline.py. grep -Ei '\.(md|txt|pdf|docx|xlsx)$' /tmp/changed-all.txt > /tmp/changed.txt || true count=$(wc -l < /tmp/changed.txt | tr -d ' ') echo "changed_count=$count" >> "$GITHUB_OUTPUT" echo "changed files:" cat /tmp/changed.txt || true - name: Ingest changed files if: steps.changes.outputs.changed_count != '0' shell: bash run: | set -euo pipefail # xargs feed avoids "argument list too long" on large pushes # while still giving compass-ingest a single invocation so # the shared DB/graph pools are reused. mapfile -t files < /tmp/changed.txt compass-ingest run --files "${files[@]}" - name: No-op summary if: steps.changes.outputs.changed_count == '0' run: echo "No supported files changed — nothing to ingest." - name: Upload ingest log if: always() # v4 uses GHES Artifacts API v2 which Gitea doesn't implement yet. uses: actions/upload-artifact@v3 with: name: ingest-${{ github.sha }} path: | /tmp/changed.txt