curator: chain compass-curate after ingest
This commit is contained in:
parent
4a67004069
commit
7531e31961
@ -21,6 +21,11 @@ jobs:
|
|||||||
# Same network override + volume allow-list as deploy.yml. See
|
# Same network override + volume allow-list as deploy.yml. See
|
||||||
# scripts/runner-config.yaml for the rationale.
|
# scripts/runner-config.yaml for the rationale.
|
||||||
network: ${{ vars.COMPASS_NETWORK || 'compass_default' }}
|
network: ${{ vars.COMPASS_NETWORK || 'compass_default' }}
|
||||||
|
outputs:
|
||||||
|
# Space-separated list of UUIDs written by `compass-ingest`; the
|
||||||
|
# `curate` job fan-outs over these. Empty on no-op pushes.
|
||||||
|
document_ids: ${{ steps.run-ingest.outputs.document_ids }}
|
||||||
|
changed_count: ${{ steps.changes.outputs.changed_count }}
|
||||||
env:
|
env:
|
||||||
# Pgvector chunk store. The CLI reads DATABASE_URL (ottomator
|
# Pgvector chunk store. The CLI reads DATABASE_URL (ottomator
|
||||||
# legacy name); POSTGRES_RAG_DSN is kept for symmetry with the
|
# legacy name); POSTGRES_RAG_DSN is kept for symmetry with the
|
||||||
@ -79,6 +84,7 @@ jobs:
|
|||||||
cat /tmp/changed.txt || true
|
cat /tmp/changed.txt || true
|
||||||
|
|
||||||
- name: Ingest changed files
|
- name: Ingest changed files
|
||||||
|
id: run-ingest
|
||||||
if: steps.changes.outputs.changed_count != '0'
|
if: steps.changes.outputs.changed_count != '0'
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -87,7 +93,14 @@ jobs:
|
|||||||
# while still giving compass-ingest a single invocation so
|
# while still giving compass-ingest a single invocation so
|
||||||
# the shared DB/graph pools are reused.
|
# the shared DB/graph pools are reused.
|
||||||
mapfile -t files < /tmp/changed.txt
|
mapfile -t files < /tmp/changed.txt
|
||||||
compass-ingest run --files "${files[@]}"
|
# `tee` so the human-readable log still shows the JSON summary
|
||||||
|
# while the file feeds the downstream id extraction.
|
||||||
|
compass-ingest run --files "${files[@]}" | tee /tmp/ingest-summary.json
|
||||||
|
# Extract UUIDs as space-separated string for the curate job
|
||||||
|
# fan-out. python3 is present on the compass-app image.
|
||||||
|
doc_ids="$(python3 -c 'import json,sys; d=json.load(open("/tmp/ingest-summary.json")); print(" ".join(r["document_id"] for r in d["results"] if not r["errors"]))')"
|
||||||
|
echo "document_ids=$doc_ids" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "captured document_ids: $doc_ids"
|
||||||
|
|
||||||
- name: No-op summary
|
- name: No-op summary
|
||||||
if: steps.changes.outputs.changed_count == '0'
|
if: steps.changes.outputs.changed_count == '0'
|
||||||
@ -101,3 +114,68 @@ jobs:
|
|||||||
name: ingest-${{ github.sha }}
|
name: ingest-${{ github.sha }}
|
||||||
path: |
|
path: |
|
||||||
/tmp/changed.txt
|
/tmp/changed.txt
|
||||||
|
/tmp/ingest-summary.json
|
||||||
|
|
||||||
|
# Curator stage — chained synchronously after ingest. Runs the
|
||||||
|
# RegulatoryObligation spec against each newly-ingested document_id,
|
||||||
|
# opens a PR against compass/migrations per accepted document. Skips
|
||||||
|
# (classified_skip / extracted) are non-fatal; only a `failed` curator
|
||||||
|
# run fails the workflow.
|
||||||
|
curate:
|
||||||
|
needs: ingest
|
||||||
|
if: needs.ingest.outputs.document_ids != ''
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container:
|
||||||
|
image: ${{ vars.COMPASS_MCP_IMAGE }}
|
||||||
|
network: ${{ vars.COMPASS_NETWORK || 'compass_default' }}
|
||||||
|
env:
|
||||||
|
# Curator reads chunks + writes agent_runs into postgres-rag.
|
||||||
|
POSTGRES_RAG_DSN: ${{ secrets.POSTGRES_RAG_DSN }}
|
||||||
|
# Raw Graphiti graph (7688) — `gather_for_document` reads
|
||||||
|
# episodes/entities traced back to the document.
|
||||||
|
NEO4J_RAW_URI: ${{ secrets.NEO4J_RAW_URI }}
|
||||||
|
NEO4J_RAW_USER: ${{ secrets.NEO4J_RAW_USER }}
|
||||||
|
NEO4J_RAW_PASSWORD: ${{ secrets.NEO4J_RAW_PASSWORD }}
|
||||||
|
# Structured graph (7687) — conflict-check reads the live schema.
|
||||||
|
# `client_from_env` reads NEO4J_URI / NEO4J_USER / NEO4J_PASSWORD.
|
||||||
|
NEO4J_URI: ${{ secrets.NEO4J_STRUCTURED_URI }}
|
||||||
|
NEO4J_USER: ${{ secrets.NEO4J_STRUCTURED_USER }}
|
||||||
|
NEO4J_PASSWORD: ${{ secrets.NEO4J_STRUCTURED_PASSWORD }}
|
||||||
|
# OpenAI for classify + extract + score LLM calls.
|
||||||
|
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
LLM_BASE_URL: ${{ vars.LLM_BASE_URL || 'https://api.openai.com/v1' }}
|
||||||
|
CURATOR_CLASSIFY_MODEL: ${{ vars.CURATOR_CLASSIFY_MODEL || 'gpt-4.1-mini' }}
|
||||||
|
CURATOR_EXTRACT_MODEL: ${{ vars.CURATOR_EXTRACT_MODEL || 'gpt-4.1-mini' }}
|
||||||
|
CURATOR_MIN_CONFIDENCE: ${{ vars.CURATOR_MIN_CONFIDENCE || '0.7' }}
|
||||||
|
# Gitea — the curator opens PRs against compass/migrations.
|
||||||
|
# Gitea rejects secret AND variable names prefixed with GITEA_ /
|
||||||
|
# GITHUB_, so both are stored as COMPASS_GITEA_*. We remap them
|
||||||
|
# back to GITEA_* here, which is what `GiteaConfig.from_env` reads.
|
||||||
|
GITEA_URL: ${{ vars.COMPASS_GITEA_URL || 'http://gitea:3000' }}
|
||||||
|
GITEA_TOKEN: ${{ secrets.COMPASS_GITEA_TOKEN }}
|
||||||
|
GITEA_OWNER: ${{ vars.COMPASS_GITEA_OWNER || 'compass' }}
|
||||||
|
GITEA_MIGRATIONS_REPO: ${{ vars.COMPASS_MIGRATIONS_REPO || 'migrations' }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Run curator per document
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
any_failed=0
|
||||||
|
for doc_id in ${{ needs.ingest.outputs.document_ids }}; do
|
||||||
|
echo "::group::curate $doc_id"
|
||||||
|
# Don't let a single-document failure abort the loop — we
|
||||||
|
# want all docs attempted so a poisoned PDF doesn't block
|
||||||
|
# the rest of the batch. The job exits non-zero at the end
|
||||||
|
# if any run returned 1 (status=failed).
|
||||||
|
set +e
|
||||||
|
compass-curate run --document-id "$doc_id"
|
||||||
|
rc=$?
|
||||||
|
set -e
|
||||||
|
if [ "$rc" = "1" ]; then
|
||||||
|
echo "::error::curator failed for $doc_id"
|
||||||
|
any_failed=1
|
||||||
|
fi
|
||||||
|
echo "::endgroup::"
|
||||||
|
done
|
||||||
|
exit "$any_failed"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user