This commit is contained in:
parent
a46e4813a8
commit
bba91bfdd3
94
.gitea/workflows/ingest.yml
Normal file
94
.gitea/workflows/ingest.yml
Normal file
@ -0,0 +1,94 @@
|
||||
name: ingest
|
||||
|
||||
# Fires on every push to main. Computes the diff against the parent
|
||||
# commit, filters to supported extensions, then invokes `compass-ingest
|
||||
# run --files <list>` inside a container pulled from
|
||||
# ${{ vars.COMPASS_APP_IMAGE }}.
|
||||
#
|
||||
# The CLI talks directly to postgres-rag + the raw Graphiti Neo4j —
|
||||
# there is no HTTP hop through the long-running app service.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
ingest:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ${{ vars.COMPASS_APP_IMAGE }}
|
||||
# Same network override + volume allow-list as deploy.yml. See
|
||||
# scripts/runner-config.yaml for the rationale.
|
||||
network: ${{ vars.COMPASS_NETWORK || 'compass_default' }}
|
||||
env:
|
||||
# Pgvector chunk store.
|
||||
POSTGRES_RAG_DSN: ${{ secrets.POSTGRES_RAG_DSN }}
|
||||
# Raw Graphiti graph (7688). These are NOT the structured-graph
|
||||
# write creds — those only exist in compass/migrations.
|
||||
NEO4J_URI: ${{ secrets.NEO4J_RAW_URI }}
|
||||
NEO4J_USER: ${{ secrets.NEO4J_RAW_USER }}
|
||||
NEO4J_PASSWORD: ${{ secrets.NEO4J_RAW_PASSWORD }}
|
||||
|
||||
steps:
|
||||
- name: Check out knowledge-base repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
# Depth 2 gives us $GITHUB_SHA and its parent so `git diff`
|
||||
# can run. The initial-commit path (parent == 000…) is
|
||||
# handled below.
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Compute changed files
|
||||
id: changes
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
BEFORE="${{ github.event.before }}"
|
||||
AFTER="${{ github.sha }}"
|
||||
|
||||
# Gitea (like GitHub) reports 40 zeros as the "before" of an
|
||||
# initial push or a branch creation. Fall back to the full
|
||||
# file listing so we don't silently skip the first batch.
|
||||
if [ -z "$BEFORE" ] || [ "$BEFORE" = "0000000000000000000000000000000000000000" ]; then
|
||||
echo "initial commit or branch creation — ingesting full tree"
|
||||
git ls-files > /tmp/changed-all.txt
|
||||
else
|
||||
# `--diff-filter=d` drops deletions: we don't want to ingest
|
||||
# a file that no longer exists.
|
||||
git diff --name-only --diff-filter=d "$BEFORE".."$AFTER" > /tmp/changed-all.txt
|
||||
fi
|
||||
|
||||
# Filter to supported extensions. Keep this list in sync with
|
||||
# the matcher in app/ingestion/pipeline.py.
|
||||
grep -Ei '\.(md|txt|pdf|docx|xlsx)$' /tmp/changed-all.txt > /tmp/changed.txt || true
|
||||
|
||||
count=$(wc -l < /tmp/changed.txt | tr -d ' ')
|
||||
echo "changed_count=$count" >> "$GITHUB_OUTPUT"
|
||||
echo "changed files:"
|
||||
cat /tmp/changed.txt || true
|
||||
|
||||
- name: Ingest changed files
|
||||
if: steps.changes.outputs.changed_count != '0'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# xargs feed avoids "argument list too long" on large pushes
|
||||
# while still giving compass-ingest a single invocation so
|
||||
# the shared DB/graph pools are reused.
|
||||
mapfile -t files < /tmp/changed.txt
|
||||
compass-ingest run --files "${files[@]}"
|
||||
|
||||
- name: No-op summary
|
||||
if: steps.changes.outputs.changed_count == '0'
|
||||
run: echo "No supported files changed — nothing to ingest."
|
||||
|
||||
- name: Upload ingest log
|
||||
if: always()
|
||||
# v4 uses GHES Artifacts API v2 which Gitea doesn't implement yet.
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ingest-${{ github.sha }}
|
||||
path: |
|
||||
/tmp/changed.txt
|
||||
Loading…
Reference in New Issue
Block a user