- docs/plan.md: Master roadmap with phases and priorities - docs/milestones/01-13: Detailed specs for each feature - Updated CLAUDE.md with plan references and build commands Milestones cover: - Phase 1: Temporal versioning, auto-capture, context injection, codebase indexing - Phase 2: Daily journal, content ingestion, graph visualization, import/export - Phase 3: Multi-graph, smart retrieval, TUI dashboard, browser extension, shell completions
261 lines
6.0 KiB
Markdown
261 lines
6.0 KiB
Markdown
# Milestone 4: Codebase Indexing
|
|
|
|
## Overview
|
|
|
|
Automatically scan and index project structure, creating component nodes for modules, services, and architectural patterns. Claude understands your codebase from day one.
|
|
|
|
## Motivation
|
|
|
|
- New projects require extensive explanation to Claude
|
|
- Architecture decisions are scattered across files
|
|
- Component relationships aren't captured anywhere
|
|
- Supermemory's `/index` command is highly valued
|
|
|
|
## Features
|
|
|
|
### 4.1 Project Scanner
|
|
|
|
```bash
|
|
# Index current project
|
|
cortex index .
|
|
|
|
# Index specific directory
|
|
cortex index ./src
|
|
|
|
# Re-index (update existing)
|
|
cortex index . --update
|
|
|
|
# Index with specific depth
|
|
cortex index . --depth 3
|
|
```
|
|
|
|
### 4.2 Auto-Detection
|
|
|
|
Detect project type and extract relevant info:
|
|
|
|
| Project Type | Detection | Extracts |
|
|
|--------------|-----------|----------|
|
|
| Node.js | `package.json` | Dependencies, scripts, name |
|
|
| Python | `pyproject.toml`, `setup.py` | Dependencies, entry points |
|
|
| Rust | `Cargo.toml` | Crates, features |
|
|
| Go | `go.mod` | Modules, dependencies |
|
|
| Generic | `README.md` | Description, setup |
|
|
|
|
### 4.3 Component Extraction
|
|
|
|
Create nodes for discovered components:
|
|
|
|
```typescript
|
|
interface IndexedComponent {
|
|
kind: 'component';
|
|
title: string; // e.g., "UserService"
|
|
content: string; // Description + key exports
|
|
tags: string[]; // ['backend', 'service', 'auth']
|
|
metadata: {
|
|
filePath: string;
|
|
language: string;
|
|
exports: string[];
|
|
imports: string[];
|
|
loc: number;
|
|
};
|
|
}
|
|
```
|
|
|
|
### 4.4 Relationship Mapping
|
|
|
|
Auto-create edges based on imports/dependencies:
|
|
|
|
```typescript
|
|
// File A imports from File B
|
|
addEdge(componentA.id, componentB.id, 'depends_on');
|
|
|
|
// Directory contains files
|
|
addEdge(directoryNode.id, fileNode.id, 'contains');
|
|
|
|
// Module implements interface
|
|
addEdge(impl.id, interface.id, 'implements');
|
|
```
|
|
|
|
### 4.5 Architecture Summary
|
|
|
|
Generate high-level architecture node:
|
|
|
|
```typescript
|
|
const architectureNode = {
|
|
kind: 'component',
|
|
title: `${projectName} Architecture`,
|
|
content: `
|
|
## Overview
|
|
${projectDescription}
|
|
|
|
## Tech Stack
|
|
- Runtime: ${runtime}
|
|
- Framework: ${framework}
|
|
- Database: ${database}
|
|
|
|
## Key Components
|
|
${components.map(c => `- **${c.title}**: ${c.summary}`).join('\n')}
|
|
|
|
## Directory Structure
|
|
${directoryTree}
|
|
`,
|
|
tags: ['architecture', 'index', projectName],
|
|
};
|
|
```
|
|
|
|
### 4.6 Incremental Updates
|
|
|
|
Track indexed files and only re-process changes:
|
|
|
|
```typescript
|
|
interface IndexState {
|
|
projectPath: string;
|
|
lastIndexed: number;
|
|
fileHashes: Record<string, string>; // path -> content hash
|
|
nodeIds: Record<string, string>; // path -> node ID
|
|
}
|
|
```
|
|
|
|
## Implementation
|
|
|
|
### Scanner Architecture
|
|
|
|
```typescript
|
|
// src/core/indexer/index.ts
|
|
export async function indexProject(root: string, options: IndexOptions): Promise<IndexResult> {
|
|
// Detect project type
|
|
const projectType = await detectProjectType(root);
|
|
|
|
// Load existing index state
|
|
const state = await loadIndexState(root);
|
|
|
|
// Scan files
|
|
const files = await scanFiles(root, {
|
|
ignore: [...DEFAULT_IGNORE, ...options.ignore],
|
|
maxDepth: options.depth,
|
|
});
|
|
|
|
// Process each file
|
|
const components: IndexedComponent[] = [];
|
|
for (const file of files) {
|
|
if (shouldSkip(file, state)) continue;
|
|
|
|
const component = await extractComponent(file, projectType);
|
|
if (component) {
|
|
components.push(component);
|
|
}
|
|
}
|
|
|
|
// Create/update nodes
|
|
const nodes = await upsertComponents(components, state);
|
|
|
|
// Map relationships
|
|
await mapRelationships(nodes, files);
|
|
|
|
// Generate architecture summary
|
|
await generateArchitectureSummary(root, projectType, nodes);
|
|
|
|
// Save state
|
|
await saveIndexState(root, state);
|
|
|
|
return { indexed: nodes.length, relationships: edges.length };
|
|
}
|
|
```
|
|
|
|
### Language Parsers
|
|
|
|
```typescript
|
|
// src/core/indexer/parsers/typescript.ts
|
|
export async function parseTypeScript(file: string): Promise<ParsedFile> {
|
|
// Use TypeScript compiler API or tree-sitter
|
|
const ast = ts.createSourceFile(file, content, ts.ScriptTarget.Latest);
|
|
|
|
return {
|
|
exports: extractExports(ast),
|
|
imports: extractImports(ast),
|
|
classes: extractClasses(ast),
|
|
functions: extractFunctions(ast),
|
|
interfaces: extractInterfaces(ast),
|
|
};
|
|
}
|
|
|
|
// Parsers for: JavaScript, Python, Rust, Go, etc.
|
|
```
|
|
|
|
### Ignore Patterns
|
|
|
|
```typescript
|
|
const DEFAULT_IGNORE = [
|
|
'node_modules',
|
|
'.git',
|
|
'dist',
|
|
'build',
|
|
'__pycache__',
|
|
'.env*',
|
|
'*.min.js',
|
|
'*.map',
|
|
'coverage',
|
|
'.next',
|
|
'target', // Rust
|
|
'vendor', // Go
|
|
];
|
|
```
|
|
|
|
## CLI Commands
|
|
|
|
| Command | Description |
|
|
|---------|-------------|
|
|
| `cortex index [path]` | Index project at path |
|
|
| `cortex index --update` | Update existing index |
|
|
| `cortex index --dry-run` | Preview what would be indexed |
|
|
| `cortex index --depth <n>` | Limit directory depth |
|
|
| `cortex index --lang <lang>` | Only index specific language |
|
|
|
|
## MCP Tools
|
|
|
|
```typescript
|
|
memory_index // Index current project
|
|
memory_reindex // Force re-index
|
|
memory_components // List indexed components
|
|
```
|
|
|
|
## Testing
|
|
|
|
- [ ] Detects Node.js, Python, Rust, Go projects
|
|
- [ ] Creates component nodes for modules
|
|
- [ ] Maps import relationships correctly
|
|
- [ ] Respects .gitignore patterns
|
|
- [ ] Incremental update only processes changes
|
|
- [ ] Architecture summary is accurate
|
|
- [ ] Performance: <30s for 10k file project
|
|
|
|
## Acceptance Criteria
|
|
|
|
- [ ] `cortex index .` creates meaningful component nodes
|
|
- [ ] Relationships reflect actual code dependencies
|
|
- [ ] Architecture summary provides useful overview
|
|
- [ ] Incremental updates are fast
|
|
- [ ] Works with monorepos
|
|
- [ ] MCP tool enables Claude to trigger indexing
|
|
|
|
## Estimated Effort
|
|
|
|
- Project detection: 2 hours
|
|
- File scanner: 3 hours
|
|
- TypeScript parser: 4 hours
|
|
- Python parser: 3 hours
|
|
- Relationship mapping: 4 hours
|
|
- Architecture summary: 3 hours
|
|
- Incremental updates: 3 hours
|
|
- Testing: 3 hours
|
|
- **Total: ~25 hours**
|
|
|
|
## Dependencies
|
|
|
|
- None (enhances Milestone 3 but independent)
|
|
|
|
## References
|
|
|
|
- [tree-sitter](https://tree-sitter.github.io/tree-sitter/) for parsing
|
|
- [Sourcebot architecture](https://github.com/sourcebot-dev/sourcebot)
|