#!/usr/bin/env bash
# Generates docs/specs/INDEX.md and .context/decisions/INDEX.md from frontmatter.
#
# Why this exists: Aucert's shared knowledge base (specs + ADRs) is markdown,
# not embeddings. Agents consult the INDEX first (cheap to read), then Read the
# specific file by path. See docs/internal/docs/agents/shared-kb-bifurcation-decision-2026-05-07.md
# for why this is preferred over vector-RAG at our current scale.
#
# Hooked via .pre-commit-config.yaml — regenerated whenever any spec or ADR
# changes. CI parity via .github/workflows/pre-commit.yml.
#
# Idempotent: writes only when content changes (mtime-stable for unchanged runs).
# Exit code 1 with "regenerated" message if INDEX.md was rewritten — pre-commit
# uses this to fail the commit and prompt the user to re-stage.

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
cd "$REPO_ROOT"

# Extract a single top-level YAML field from a markdown file's frontmatter.
# Quoting is preserved for arrays (e.g. tags) — we want them readable in the table.
extract_field() {
  local file=$1
  local field=$2
  awk -v field="$field" '
    /^---$/ { c++; next }
    c == 1 {
      if ($0 ~ "^" field ":") {
        sub("^" field ": *", "")
        gsub(/^"|"$/, "")
        print
        exit
      }
    }
    c == 2 { exit }
  ' "$file"
}

# Escape pipe characters in a cell so they do not break the markdown table.
escape_pipe() { printf '%s' "$1" | sed 's/|/\\|/g'; }

# Build docs/specs/INDEX.md from approved/ + drafts/.
build_specs_index() {
  local out=docs/specs/INDEX.md
  local tmp
  tmp=$(mktemp)

  {
    echo '<!-- Generated by tools/scripts/build-kb-index.sh — do not edit manually.'
    echo '     Frontmatter source: docs/specs/{approved,drafts}/SPEC-*.md.'
    echo '     Regenerated by the build-kb-index pre-commit hook on any change. -->'
    echo
    echo '# Aucert specs — index'
    echo
    echo '**Agents:** when you need spec context, Read this index first to identify'
    echo 'the relevant spec(s) by id / title / tags, then `Read` the specific file at'
    echo '`path`. Do not load the full content of every spec — the index is the'
    echo 'cheap-to-read entrypoint, individual specs are the expensive-but-detailed targets.'
    echo
    echo '## Approved'
    echo
    echo '| id | title | status | last_updated | tags | path |'
    echo '|---|---|---|---|---|---|'

    if compgen -G "docs/specs/approved/SPEC-*.md" > /dev/null; then
      for f in $(ls docs/specs/approved/SPEC-*.md | sort); do
        local id title status lu tags
        id=$(extract_field "$f" id)
        title=$(extract_field "$f" title)
        status=$(extract_field "$f" status)
        lu=$(extract_field "$f" last_updated)
        tags=$(extract_field "$f" tags)
        printf '| %s | %s | %s | %s | %s | %s |\n' \
          "$(escape_pipe "$id")" \
          "$(escape_pipe "$title")" \
          "$(escape_pipe "$status")" \
          "$(escape_pipe "$lu")" \
          "$(escape_pipe "$tags")" \
          "$(escape_pipe "$f")"
      done
    else
      echo '| _(none)_ | | | | | |'
    fi

    echo
    echo '## Drafts'
    echo
    echo '| id | title | status | last_updated | tags | path |'
    echo '|---|---|---|---|---|---|'

    if compgen -G "docs/specs/drafts/SPEC-*.md" > /dev/null; then
      for f in $(ls docs/specs/drafts/SPEC-*.md | sort); do
        local id title status lu tags
        id=$(extract_field "$f" id)
        title=$(extract_field "$f" title)
        status=$(extract_field "$f" status)
        lu=$(extract_field "$f" last_updated)
        tags=$(extract_field "$f" tags)
        printf '| %s | %s | %s | %s | %s | %s |\n' \
          "$(escape_pipe "$id")" \
          "$(escape_pipe "$title")" \
          "$(escape_pipe "$status")" \
          "$(escape_pipe "$lu")" \
          "$(escape_pipe "$tags")" \
          "$(escape_pipe "$f")"
      done
    else
      echo '| _(none)_ | | | | | |'
    fi
  } > "$tmp"

  if ! cmp -s "$tmp" "$out" 2>/dev/null; then
    mv "$tmp" "$out"
    echo "Regenerated $out"
    return 1
  fi
  rm -f "$tmp"
  return 0
}

# Build .context/decisions/INDEX.md from .context/decisions/ADR-*.md.
build_adrs_index() {
  local out=.context/decisions/INDEX.md
  local tmp
  tmp=$(mktemp)

  {
    echo '<!-- Generated by tools/scripts/build-kb-index.sh — do not edit manually.'
    echo '     Frontmatter source: .context/decisions/ADR-*.md.'
    echo '     ADR frontmatter is sparser than SPEC frontmatter — title comes from the H1.'
    echo '     Regenerated by the build-kb-index pre-commit hook on any change. -->'
    echo
    echo '# Aucert ADRs — index'
    echo
    echo '**Agents:** when you need ADR context, Read this index first, then `Read`'
    echo 'the specific ADR file at `path`. ADRs are typically short — once you have'
    echo 'narrowed by id and tags, reading the full file is cheap.'
    echo
    echo '| id | title | status | date | deciders | tags | path |'
    echo '|---|---|---|---|---|---|---|'

    if compgen -G ".context/decisions/ADR-*.md" > /dev/null; then
      for f in $(ls .context/decisions/ADR-*.md | sort); do
        local id title status date deciders tags
        id=$(basename "$f" | sed -E 's/^(ADR-[0-9]+).*/\1/')
        # Title: first H1 line. Strip "ADR-NNN: " or "Decision: " prefix if present.
        title=$(grep -m1 '^# ' "$f" | sed -E 's/^# (ADR-[0-9]+: |Decision: )?//')
        status=$(extract_field "$f" status)
        date=$(extract_field "$f" date)
        deciders=$(extract_field "$f" deciders)
        tags=$(extract_field "$f" tags)
        printf '| %s | %s | %s | %s | %s | %s | %s |\n' \
          "$(escape_pipe "$id")" \
          "$(escape_pipe "$title")" \
          "$(escape_pipe "$status")" \
          "$(escape_pipe "$date")" \
          "$(escape_pipe "$deciders")" \
          "$(escape_pipe "$tags")" \
          "$(escape_pipe "$f")"
      done
    else
      echo '| _(none)_ | | | | | | |'
    fi
  } > "$tmp"

  if ! cmp -s "$tmp" "$out" 2>/dev/null; then
    mv "$tmp" "$out"
    echo "Regenerated $out"
    return 1
  fi
  rm -f "$tmp"
  return 0
}

CHANGED=0
build_specs_index || CHANGED=1
build_adrs_index || CHANGED=1

if [ "$CHANGED" -eq 1 ]; then
  echo
  echo 'INDEX.md files were regenerated. Stage them with `git add` and re-commit.'
  exit 1
fi

echo 'INDEX.md files up to date.'
exit 0
