Files
cortex/docs/milestones/10-smart-retrieval.md
omigamedev d484f61b29 Add development plan with 13 milestone specifications
- docs/plan.md: Master roadmap with phases and priorities
- docs/milestones/01-13: Detailed specs for each feature
- Updated CLAUDE.md with plan references and build commands

Milestones cover:
- Phase 1: Temporal versioning, auto-capture, context injection, codebase indexing
- Phase 2: Daily journal, content ingestion, graph visualization, import/export
- Phase 3: Multi-graph, smart retrieval, TUI dashboard, browser extension, shell completions
2026-02-03 09:36:08 +01:00

7.5 KiB

Milestone 10: Smart Retrieval

Overview

Context-aware, git-integrated search that understands what you're working on and retrieves the most relevant memories.

Motivation

  • Current search requires explicit queries
  • Relevance should consider current context
  • Git changes indicate what's important right now
  • Reduce cognitive load of "what should I search for?"

Features

# Search based on current context (files, git status)
cortex smart-search
cortex ss  # Alias

# Combine with explicit query
cortex smart-search "authentication"

10.2 Git-Integrated Relevance

interface GitContext {
  branch: string;
  recentCommits: string[];    // Last 5 commit messages
  modifiedFiles: string[];    // Uncommitted changes
  stagedFiles: string[];
  recentlyTouched: string[];  // Files in recent commits
}

10.3 File-Based Relevance

// Boost nodes related to currently open/modified files
function getFileContext(): FileContext {
  return {
    cwd: process.cwd(),
    modifiedFiles: getGitModified(),
    recentFiles: getRecentlyAccessed(),
    projectType: detectProjectType(),
  };
}

10.4 Time-Aware Boosting

interface TimeBoost {
  lastHour: 1.5;      // Accessed in last hour
  lastDay: 1.3;       // Accessed today
  lastWeek: 1.1;      // Accessed this week
  older: 1.0;         // No boost
}
# Find nodes and expand to related
cortex query "auth" --expand

# Returns auth nodes + nodes they link to

10.6 "What Should I Know?" Command

# Proactive: what's relevant right now?
cortex context
cortex what  # Alias

# Returns:
# - Memories related to current git branch
# - Decisions about modified files
# - Open tasks for this project
# - Recent work in this area

Implementation

Smart Search Pipeline

// src/core/search/smart.ts
export async function smartSearch(explicitQuery?: string): Promise<SearchResult[]> {
  // Gather context signals
  const gitContext = await getGitContext();
  const fileContext = await getFileContext();
  const projectContext = await getProjectContext();

  // Build implicit query from context
  const implicitQuery = buildImplicitQuery(gitContext, fileContext, projectContext);

  // Combine with explicit query
  const combinedQuery = explicitQuery
    ? `${explicitQuery} ${implicitQuery}`
    : implicitQuery;

  // Run hybrid search
  const results = await query(combinedQuery, { limit: 50 });

  // Re-rank based on context signals
  const reranked = rerankResults(results, {
    gitContext,
    fileContext,
    timeBoosts: TIME_BOOSTS,
  });

  return reranked.slice(0, 20);
}

function buildImplicitQuery(git: GitContext, file: FileContext, project: ProjectContext): string {
  const parts: string[] = [];

  // Add branch name (often contains feature/ticket info)
  if (git.branch && git.branch !== 'main' && git.branch !== 'master') {
    parts.push(git.branch.replace(/[-_\/]/g, ' '));
  }

  // Add recent commit messages
  parts.push(...git.recentCommits.slice(0, 3));

  // Add modified file names (without extension)
  parts.push(...file.modifiedFiles.map(f => path.basename(f, path.extname(f))));

  // Add project name
  parts.push(project.name);

  return parts.join(' ');
}

Git Context Extractor

// src/core/search/git-context.ts
export async function getGitContext(): Promise<GitContext> {
  try {
    const branch = execSync('git branch --show-current', { encoding: 'utf-8' }).trim();
    const recentCommits = execSync('git log --oneline -5', { encoding: 'utf-8' })
      .trim()
      .split('\n')
      .map(line => line.split(' ').slice(1).join(' '));
    const modifiedFiles = execSync('git diff --name-only', { encoding: 'utf-8' })
      .trim()
      .split('\n')
      .filter(Boolean);
    const stagedFiles = execSync('git diff --staged --name-only', { encoding: 'utf-8' })
      .trim()
      .split('\n')
      .filter(Boolean);

    return { branch, recentCommits, modifiedFiles, stagedFiles };
  } catch {
    return { branch: '', recentCommits: [], modifiedFiles: [], stagedFiles: [] };
  }
}

Re-ranking Algorithm

function rerankResults(
  results: SearchResult[],
  context: RerankContext
): SearchResult[] {
  return results
    .map(result => {
      let boost = 1.0;

      // Time boost
      const age = Date.now() - result.node.lastAccessedAt;
      if (age < 60 * 60 * 1000) boost *= 1.5;      // Last hour
      else if (age < 24 * 60 * 60 * 1000) boost *= 1.3;  // Last day
      else if (age < 7 * 24 * 60 * 60 * 1000) boost *= 1.1;  // Last week

      // File relevance boost
      const nodeFiles = result.node.metadata?.files || [];
      const overlap = nodeFiles.filter(f =>
        context.gitContext.modifiedFiles.includes(f)
      ).length;
      if (overlap > 0) boost *= 1.0 + (0.2 * overlap);

      // Branch relevance boost
      if (result.node.tags.includes(context.gitContext.branch)) {
        boost *= 1.3;
      }

      return { ...result, score: result.score * boost };
    })
    .sort((a, b) => b.score - a.score);
}

"What Should I Know?" Command

// src/cli/commands/what.ts
export async function whatCommand(): Promise<void> {
  const context = await gatherFullContext();

  console.log(chalk.bold('📚 What you should know:\n'));

  // Related to current branch
  if (context.branchRelated.length > 0) {
    console.log(chalk.cyan('Branch: ' + context.gitContext.branch));
    for (const node of context.branchRelated.slice(0, 3)) {
      console.log(`  • ${node.title}`);
    }
    console.log();
  }

  // Related to modified files
  if (context.fileRelated.length > 0) {
    console.log(chalk.cyan('Related to changes:'));
    for (const node of context.fileRelated.slice(0, 3)) {
      console.log(`  • ${node.title}`);
    }
    console.log();
  }

  // Open tasks
  if (context.tasks.length > 0) {
    console.log(chalk.cyan('Open tasks:'));
    for (const task of context.tasks.slice(0, 3)) {
      console.log(`  • ${task.title}`);
    }
    console.log();
  }

  // Recent decisions
  if (context.decisions.length > 0) {
    console.log(chalk.cyan('Recent decisions:'));
    for (const decision of context.decisions.slice(0, 3)) {
      console.log(`  • ${decision.title}`);
    }
  }
}

CLI Commands

Command Description
cortex smart-search [query] Context-aware search
cortex ss [query] Alias
cortex what What should I know right now?
cortex query --expand Expand to related nodes

MCP Tools

memory_smart_search   // Context-aware search
memory_what           // Proactive context retrieval

Testing

  • Git context extraction works
  • Modified files boost relevant nodes
  • Branch name improves relevance
  • Time boosting works correctly
  • "What" command returns useful results
  • Works when not in git repo (graceful fallback)

Acceptance Criteria

  • Smart search outperforms basic search in relevance
  • Git integration provides useful signals
  • "What" command surfaces actionable info
  • No performance regression
  • Works in non-git directories

Estimated Effort

  • Git context extraction: 2 hours
  • Smart search pipeline: 4 hours
  • Re-ranking algorithm: 3 hours
  • "What" command: 3 hours
  • MCP tools: 2 hours
  • Testing: 3 hours
  • Total: ~17 hours

Dependencies

  • Git (optional, graceful fallback)

References