Add development plan with 13 milestone specifications
- docs/plan.md: Master roadmap with phases and priorities - docs/milestones/01-13: Detailed specs for each feature - Updated CLAUDE.md with plan references and build commands Milestones cover: - Phase 1: Temporal versioning, auto-capture, context injection, codebase indexing - Phase 2: Daily journal, content ingestion, graph visualization, import/export - Phase 3: Multi-graph, smart retrieval, TUI dashboard, browser extension, shell completions
This commit is contained in:
260
docs/milestones/04-codebase-indexing.md
Normal file
260
docs/milestones/04-codebase-indexing.md
Normal file
@@ -0,0 +1,260 @@
|
||||
# Milestone 4: Codebase Indexing
|
||||
|
||||
## Overview
|
||||
|
||||
Automatically scan and index project structure, creating component nodes for modules, services, and architectural patterns. Claude understands your codebase from day one.
|
||||
|
||||
## Motivation
|
||||
|
||||
- New projects require extensive explanation to Claude
|
||||
- Architecture decisions are scattered across files
|
||||
- Component relationships aren't captured anywhere
|
||||
- Supermemory's `/index` command is highly valued
|
||||
|
||||
## Features
|
||||
|
||||
### 4.1 Project Scanner
|
||||
|
||||
```bash
|
||||
# Index current project
|
||||
cortex index .
|
||||
|
||||
# Index specific directory
|
||||
cortex index ./src
|
||||
|
||||
# Re-index (update existing)
|
||||
cortex index . --update
|
||||
|
||||
# Index with specific depth
|
||||
cortex index . --depth 3
|
||||
```
|
||||
|
||||
### 4.2 Auto-Detection
|
||||
|
||||
Detect project type and extract relevant info:
|
||||
|
||||
| Project Type | Detection | Extracts |
|
||||
|--------------|-----------|----------|
|
||||
| Node.js | `package.json` | Dependencies, scripts, name |
|
||||
| Python | `pyproject.toml`, `setup.py` | Dependencies, entry points |
|
||||
| Rust | `Cargo.toml` | Crates, features |
|
||||
| Go | `go.mod` | Modules, dependencies |
|
||||
| Generic | `README.md` | Description, setup |
|
||||
|
||||
### 4.3 Component Extraction
|
||||
|
||||
Create nodes for discovered components:
|
||||
|
||||
```typescript
|
||||
interface IndexedComponent {
|
||||
kind: 'component';
|
||||
title: string; // e.g., "UserService"
|
||||
content: string; // Description + key exports
|
||||
tags: string[]; // ['backend', 'service', 'auth']
|
||||
metadata: {
|
||||
filePath: string;
|
||||
language: string;
|
||||
exports: string[];
|
||||
imports: string[];
|
||||
loc: number;
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 4.4 Relationship Mapping
|
||||
|
||||
Auto-create edges based on imports/dependencies:
|
||||
|
||||
```typescript
|
||||
// File A imports from File B
|
||||
addEdge(componentA.id, componentB.id, 'depends_on');
|
||||
|
||||
// Directory contains files
|
||||
addEdge(directoryNode.id, fileNode.id, 'contains');
|
||||
|
||||
// Module implements interface
|
||||
addEdge(impl.id, interface.id, 'implements');
|
||||
```
|
||||
|
||||
### 4.5 Architecture Summary
|
||||
|
||||
Generate high-level architecture node:
|
||||
|
||||
```typescript
|
||||
const architectureNode = {
|
||||
kind: 'component',
|
||||
title: `${projectName} Architecture`,
|
||||
content: `
|
||||
## Overview
|
||||
${projectDescription}
|
||||
|
||||
## Tech Stack
|
||||
- Runtime: ${runtime}
|
||||
- Framework: ${framework}
|
||||
- Database: ${database}
|
||||
|
||||
## Key Components
|
||||
${components.map(c => `- **${c.title}**: ${c.summary}`).join('\n')}
|
||||
|
||||
## Directory Structure
|
||||
${directoryTree}
|
||||
`,
|
||||
tags: ['architecture', 'index', projectName],
|
||||
};
|
||||
```
|
||||
|
||||
### 4.6 Incremental Updates
|
||||
|
||||
Track indexed files and only re-process changes:
|
||||
|
||||
```typescript
|
||||
interface IndexState {
|
||||
projectPath: string;
|
||||
lastIndexed: number;
|
||||
fileHashes: Record<string, string>; // path -> content hash
|
||||
nodeIds: Record<string, string>; // path -> node ID
|
||||
}
|
||||
```
|
||||
|
||||
## Implementation
|
||||
|
||||
### Scanner Architecture
|
||||
|
||||
```typescript
|
||||
// src/core/indexer/index.ts
|
||||
export async function indexProject(root: string, options: IndexOptions): Promise<IndexResult> {
|
||||
// Detect project type
|
||||
const projectType = await detectProjectType(root);
|
||||
|
||||
// Load existing index state
|
||||
const state = await loadIndexState(root);
|
||||
|
||||
// Scan files
|
||||
const files = await scanFiles(root, {
|
||||
ignore: [...DEFAULT_IGNORE, ...options.ignore],
|
||||
maxDepth: options.depth,
|
||||
});
|
||||
|
||||
// Process each file
|
||||
const components: IndexedComponent[] = [];
|
||||
for (const file of files) {
|
||||
if (shouldSkip(file, state)) continue;
|
||||
|
||||
const component = await extractComponent(file, projectType);
|
||||
if (component) {
|
||||
components.push(component);
|
||||
}
|
||||
}
|
||||
|
||||
// Create/update nodes
|
||||
const nodes = await upsertComponents(components, state);
|
||||
|
||||
// Map relationships
|
||||
await mapRelationships(nodes, files);
|
||||
|
||||
// Generate architecture summary
|
||||
await generateArchitectureSummary(root, projectType, nodes);
|
||||
|
||||
// Save state
|
||||
await saveIndexState(root, state);
|
||||
|
||||
return { indexed: nodes.length, relationships: edges.length };
|
||||
}
|
||||
```
|
||||
|
||||
### Language Parsers
|
||||
|
||||
```typescript
|
||||
// src/core/indexer/parsers/typescript.ts
|
||||
export async function parseTypeScript(file: string): Promise<ParsedFile> {
|
||||
// Use TypeScript compiler API or tree-sitter
|
||||
const ast = ts.createSourceFile(file, content, ts.ScriptTarget.Latest);
|
||||
|
||||
return {
|
||||
exports: extractExports(ast),
|
||||
imports: extractImports(ast),
|
||||
classes: extractClasses(ast),
|
||||
functions: extractFunctions(ast),
|
||||
interfaces: extractInterfaces(ast),
|
||||
};
|
||||
}
|
||||
|
||||
// Parsers for: JavaScript, Python, Rust, Go, etc.
|
||||
```
|
||||
|
||||
### Ignore Patterns
|
||||
|
||||
```typescript
|
||||
const DEFAULT_IGNORE = [
|
||||
'node_modules',
|
||||
'.git',
|
||||
'dist',
|
||||
'build',
|
||||
'__pycache__',
|
||||
'.env*',
|
||||
'*.min.js',
|
||||
'*.map',
|
||||
'coverage',
|
||||
'.next',
|
||||
'target', // Rust
|
||||
'vendor', // Go
|
||||
];
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `cortex index [path]` | Index project at path |
|
||||
| `cortex index --update` | Update existing index |
|
||||
| `cortex index --dry-run` | Preview what would be indexed |
|
||||
| `cortex index --depth <n>` | Limit directory depth |
|
||||
| `cortex index --lang <lang>` | Only index specific language |
|
||||
|
||||
## MCP Tools
|
||||
|
||||
```typescript
|
||||
memory_index // Index current project
|
||||
memory_reindex // Force re-index
|
||||
memory_components // List indexed components
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
- [ ] Detects Node.js, Python, Rust, Go projects
|
||||
- [ ] Creates component nodes for modules
|
||||
- [ ] Maps import relationships correctly
|
||||
- [ ] Respects .gitignore patterns
|
||||
- [ ] Incremental update only processes changes
|
||||
- [ ] Architecture summary is accurate
|
||||
- [ ] Performance: <30s for 10k file project
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `cortex index .` creates meaningful component nodes
|
||||
- [ ] Relationships reflect actual code dependencies
|
||||
- [ ] Architecture summary provides useful overview
|
||||
- [ ] Incremental updates are fast
|
||||
- [ ] Works with monorepos
|
||||
- [ ] MCP tool enables Claude to trigger indexing
|
||||
|
||||
## Estimated Effort
|
||||
|
||||
- Project detection: 2 hours
|
||||
- File scanner: 3 hours
|
||||
- TypeScript parser: 4 hours
|
||||
- Python parser: 3 hours
|
||||
- Relationship mapping: 4 hours
|
||||
- Architecture summary: 3 hours
|
||||
- Incremental updates: 3 hours
|
||||
- Testing: 3 hours
|
||||
- **Total: ~25 hours**
|
||||
|
||||
## Dependencies
|
||||
|
||||
- None (enhances Milestone 3 but independent)
|
||||
|
||||
## References
|
||||
|
||||
- [tree-sitter](https://tree-sitter.github.io/tree-sitter/) for parsing
|
||||
- [Sourcebot architecture](https://github.com/sourcebot-dev/sourcebot)
|
||||
Reference in New Issue
Block a user