Add development plan with 13 milestone specifications
- docs/plan.md: Master roadmap with phases and priorities - docs/milestones/01-13: Detailed specs for each feature - Updated CLAUDE.md with plan references and build commands Milestones cover: - Phase 1: Temporal versioning, auto-capture, context injection, codebase indexing - Phase 2: Daily journal, content ingestion, graph visualization, import/export - Phase 3: Multi-graph, smart retrieval, TUI dashboard, browser extension, shell completions
This commit is contained in:
299
docs/milestones/10-smart-retrieval.md
Normal file
299
docs/milestones/10-smart-retrieval.md
Normal file
@@ -0,0 +1,299 @@
|
||||
# Milestone 10: Smart Retrieval
|
||||
|
||||
## Overview
|
||||
|
||||
Context-aware, git-integrated search that understands what you're working on and retrieves the most relevant memories.
|
||||
|
||||
## Motivation
|
||||
|
||||
- Current search requires explicit queries
|
||||
- Relevance should consider current context
|
||||
- Git changes indicate what's important right now
|
||||
- Reduce cognitive load of "what should I search for?"
|
||||
|
||||
## Features
|
||||
|
||||
### 10.1 Context-Aware Search
|
||||
|
||||
```bash
|
||||
# Search based on current context (files, git status)
|
||||
cortex smart-search
|
||||
cortex ss # Alias
|
||||
|
||||
# Combine with explicit query
|
||||
cortex smart-search "authentication"
|
||||
```
|
||||
|
||||
### 10.2 Git-Integrated Relevance
|
||||
|
||||
```typescript
|
||||
interface GitContext {
|
||||
branch: string;
|
||||
recentCommits: string[]; // Last 5 commit messages
|
||||
modifiedFiles: string[]; // Uncommitted changes
|
||||
stagedFiles: string[];
|
||||
recentlyTouched: string[]; // Files in recent commits
|
||||
}
|
||||
```
|
||||
|
||||
### 10.3 File-Based Relevance
|
||||
|
||||
```typescript
|
||||
// Boost nodes related to currently open/modified files
|
||||
function getFileContext(): FileContext {
|
||||
return {
|
||||
cwd: process.cwd(),
|
||||
modifiedFiles: getGitModified(),
|
||||
recentFiles: getRecentlyAccessed(),
|
||||
projectType: detectProjectType(),
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 10.4 Time-Aware Boosting
|
||||
|
||||
```typescript
|
||||
interface TimeBoost {
|
||||
lastHour: 1.5; // Accessed in last hour
|
||||
lastDay: 1.3; // Accessed today
|
||||
lastWeek: 1.1; // Accessed this week
|
||||
older: 1.0; // No boost
|
||||
}
|
||||
```
|
||||
|
||||
### 10.5 Related Node Expansion
|
||||
|
||||
```bash
|
||||
# Find nodes and expand to related
|
||||
cortex query "auth" --expand
|
||||
|
||||
# Returns auth nodes + nodes they link to
|
||||
```
|
||||
|
||||
### 10.6 "What Should I Know?" Command
|
||||
|
||||
```bash
|
||||
# Proactive: what's relevant right now?
|
||||
cortex context
|
||||
cortex what # Alias
|
||||
|
||||
# Returns:
|
||||
# - Memories related to current git branch
|
||||
# - Decisions about modified files
|
||||
# - Open tasks for this project
|
||||
# - Recent work in this area
|
||||
```
|
||||
|
||||
## Implementation
|
||||
|
||||
### Smart Search Pipeline
|
||||
|
||||
```typescript
|
||||
// src/core/search/smart.ts
|
||||
export async function smartSearch(explicitQuery?: string): Promise<SearchResult[]> {
|
||||
// Gather context signals
|
||||
const gitContext = await getGitContext();
|
||||
const fileContext = await getFileContext();
|
||||
const projectContext = await getProjectContext();
|
||||
|
||||
// Build implicit query from context
|
||||
const implicitQuery = buildImplicitQuery(gitContext, fileContext, projectContext);
|
||||
|
||||
// Combine with explicit query
|
||||
const combinedQuery = explicitQuery
|
||||
? `${explicitQuery} ${implicitQuery}`
|
||||
: implicitQuery;
|
||||
|
||||
// Run hybrid search
|
||||
const results = await query(combinedQuery, { limit: 50 });
|
||||
|
||||
// Re-rank based on context signals
|
||||
const reranked = rerankResults(results, {
|
||||
gitContext,
|
||||
fileContext,
|
||||
timeBoosts: TIME_BOOSTS,
|
||||
});
|
||||
|
||||
return reranked.slice(0, 20);
|
||||
}
|
||||
|
||||
function buildImplicitQuery(git: GitContext, file: FileContext, project: ProjectContext): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
// Add branch name (often contains feature/ticket info)
|
||||
if (git.branch && git.branch !== 'main' && git.branch !== 'master') {
|
||||
parts.push(git.branch.replace(/[-_\/]/g, ' '));
|
||||
}
|
||||
|
||||
// Add recent commit messages
|
||||
parts.push(...git.recentCommits.slice(0, 3));
|
||||
|
||||
// Add modified file names (without extension)
|
||||
parts.push(...file.modifiedFiles.map(f => path.basename(f, path.extname(f))));
|
||||
|
||||
// Add project name
|
||||
parts.push(project.name);
|
||||
|
||||
return parts.join(' ');
|
||||
}
|
||||
```
|
||||
|
||||
### Git Context Extractor
|
||||
|
||||
```typescript
|
||||
// src/core/search/git-context.ts
|
||||
export async function getGitContext(): Promise<GitContext> {
|
||||
try {
|
||||
const branch = execSync('git branch --show-current', { encoding: 'utf-8' }).trim();
|
||||
const recentCommits = execSync('git log --oneline -5', { encoding: 'utf-8' })
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map(line => line.split(' ').slice(1).join(' '));
|
||||
const modifiedFiles = execSync('git diff --name-only', { encoding: 'utf-8' })
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
const stagedFiles = execSync('git diff --staged --name-only', { encoding: 'utf-8' })
|
||||
.trim()
|
||||
.split('\n')
|
||||
.filter(Boolean);
|
||||
|
||||
return { branch, recentCommits, modifiedFiles, stagedFiles };
|
||||
} catch {
|
||||
return { branch: '', recentCommits: [], modifiedFiles: [], stagedFiles: [] };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Re-ranking Algorithm
|
||||
|
||||
```typescript
|
||||
function rerankResults(
|
||||
results: SearchResult[],
|
||||
context: RerankContext
|
||||
): SearchResult[] {
|
||||
return results
|
||||
.map(result => {
|
||||
let boost = 1.0;
|
||||
|
||||
// Time boost
|
||||
const age = Date.now() - result.node.lastAccessedAt;
|
||||
if (age < 60 * 60 * 1000) boost *= 1.5; // Last hour
|
||||
else if (age < 24 * 60 * 60 * 1000) boost *= 1.3; // Last day
|
||||
else if (age < 7 * 24 * 60 * 60 * 1000) boost *= 1.1; // Last week
|
||||
|
||||
// File relevance boost
|
||||
const nodeFiles = result.node.metadata?.files || [];
|
||||
const overlap = nodeFiles.filter(f =>
|
||||
context.gitContext.modifiedFiles.includes(f)
|
||||
).length;
|
||||
if (overlap > 0) boost *= 1.0 + (0.2 * overlap);
|
||||
|
||||
// Branch relevance boost
|
||||
if (result.node.tags.includes(context.gitContext.branch)) {
|
||||
boost *= 1.3;
|
||||
}
|
||||
|
||||
return { ...result, score: result.score * boost };
|
||||
})
|
||||
.sort((a, b) => b.score - a.score);
|
||||
}
|
||||
```
|
||||
|
||||
### "What Should I Know?" Command
|
||||
|
||||
```typescript
|
||||
// src/cli/commands/what.ts
|
||||
export async function whatCommand(): Promise<void> {
|
||||
const context = await gatherFullContext();
|
||||
|
||||
console.log(chalk.bold('📚 What you should know:\n'));
|
||||
|
||||
// Related to current branch
|
||||
if (context.branchRelated.length > 0) {
|
||||
console.log(chalk.cyan('Branch: ' + context.gitContext.branch));
|
||||
for (const node of context.branchRelated.slice(0, 3)) {
|
||||
console.log(` • ${node.title}`);
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Related to modified files
|
||||
if (context.fileRelated.length > 0) {
|
||||
console.log(chalk.cyan('Related to changes:'));
|
||||
for (const node of context.fileRelated.slice(0, 3)) {
|
||||
console.log(` • ${node.title}`);
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Open tasks
|
||||
if (context.tasks.length > 0) {
|
||||
console.log(chalk.cyan('Open tasks:'));
|
||||
for (const task of context.tasks.slice(0, 3)) {
|
||||
console.log(` • ${task.title}`);
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Recent decisions
|
||||
if (context.decisions.length > 0) {
|
||||
console.log(chalk.cyan('Recent decisions:'));
|
||||
for (const decision of context.decisions.slice(0, 3)) {
|
||||
console.log(` • ${decision.title}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `cortex smart-search [query]` | Context-aware search |
|
||||
| `cortex ss [query]` | Alias |
|
||||
| `cortex what` | What should I know right now? |
|
||||
| `cortex query --expand` | Expand to related nodes |
|
||||
|
||||
## MCP Tools
|
||||
|
||||
```typescript
|
||||
memory_smart_search // Context-aware search
|
||||
memory_what // Proactive context retrieval
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
- [ ] Git context extraction works
|
||||
- [ ] Modified files boost relevant nodes
|
||||
- [ ] Branch name improves relevance
|
||||
- [ ] Time boosting works correctly
|
||||
- [ ] "What" command returns useful results
|
||||
- [ ] Works when not in git repo (graceful fallback)
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Smart search outperforms basic search in relevance
|
||||
- [ ] Git integration provides useful signals
|
||||
- [ ] "What" command surfaces actionable info
|
||||
- [ ] No performance regression
|
||||
- [ ] Works in non-git directories
|
||||
|
||||
## Estimated Effort
|
||||
|
||||
- Git context extraction: 2 hours
|
||||
- Smart search pipeline: 4 hours
|
||||
- Re-ranking algorithm: 3 hours
|
||||
- "What" command: 3 hours
|
||||
- MCP tools: 2 hours
|
||||
- Testing: 3 hours
|
||||
- **Total: ~17 hours**
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Git (optional, graceful fallback)
|
||||
|
||||
## References
|
||||
|
||||
- [Semantic search best practices](https://www.anthropic.com/research/rag)
|
||||
- [Learning to rank](https://en.wikipedia.org/wiki/Learning_to_rank)
|
||||
Reference in New Issue
Block a user