Add smart retrieval with git context (Milestone 10)

- Git context extraction: branch, commits, modified files
- Smart search with context-based re-ranking
- Time boosting for recently accessed nodes
- File relevance boosting for modified files
- Branch keyword matching
- CLI: smart-search, ss, what, now commands
- MCP tools: memory_smart_search, memory_what
This commit is contained in:
2026-02-03 11:28:39 +01:00
parent aea3e93ff7
commit f891f37bde
5 changed files with 704 additions and 0 deletions

166
src/cli/commands/smart.ts Normal file
View File

@@ -0,0 +1,166 @@
import { Command } from 'commander';
import chalk from 'chalk';
import { smartSearch, gatherWhatContext, formatWhatContext } from '../../core/search/smart';
import { NodeKind } from '../../types';
export const smartSearchCommand = new Command('smart-search')
.description('Context-aware search using git and file signals')
.argument('[query]', 'Optional explicit search query')
.option('--kind <kind>', 'Filter by node kind')
.option('--limit <n>', 'Max results', '10')
.option('--expand', 'Include related nodes')
.option('--format <fmt>', 'Output format: text or json', 'text')
.action(async (queryText: string | undefined, opts) => {
try {
const results = await smartSearch(queryText, {
kind: opts.kind as NodeKind | undefined,
limit: parseInt(opts.limit),
includeRelated: opts.expand,
});
if (results.length === 0) {
console.log(chalk.yellow('No relevant results found.'));
console.log(chalk.dim('Try adding more context or using a specific query.'));
return;
}
if (opts.format === 'json') {
console.log(JSON.stringify(results.map(r => ({
...r.node,
embedding: undefined,
score: r.score,
originalScore: r.originalScore,
boosts: r.boosts,
})), null, 2));
return;
}
console.log(chalk.cyan(`Found ${results.length} relevant results:\n`));
for (const r of results) {
const n = r.node;
const boostInfo = Object.entries(r.boosts)
.filter(([, v]) => v !== undefined)
.map(([k, v]) => `${k}:${(v as number).toFixed(2)}`)
.join(' ');
console.log(`${chalk.cyan(n.id.slice(0, 8))} [${chalk.magenta(n.kind)}] ${chalk.bold(n.title)}`);
console.log(chalk.dim(` Score: ${r.score.toFixed(3)} (base: ${r.originalScore.toFixed(3)}) ${boostInfo ? `[${boostInfo}]` : ''}`));
if (n.content) {
const preview = n.content.slice(0, 100).replace(/\n/g, ' ');
console.log(chalk.dim(` ${preview}${n.content.length > 100 ? '...' : ''}`));
}
if (n.tags.length) {
console.log(` ${chalk.yellow(n.tags.join(', '))}`);
}
console.log();
}
} catch (err: any) {
console.error(chalk.red(`Error: ${err.message}`));
process.exit(1);
}
});
// Alias
export const ssCommand = new Command('ss')
.description('Alias for smart-search')
.argument('[query]', 'Optional search query')
.option('--kind <kind>', 'Filter by node kind')
.option('--limit <n>', 'Max results', '10')
.option('--expand', 'Include related nodes')
.option('--format <fmt>', 'Output format: text or json', 'text')
.action(async (queryText: string | undefined, opts) => {
const results = await smartSearch(queryText, {
kind: opts.kind as NodeKind | undefined,
limit: parseInt(opts.limit),
includeRelated: opts.expand,
});
if (results.length === 0) {
console.log(chalk.yellow('No relevant results found.'));
return;
}
if (opts.format === 'json') {
console.log(JSON.stringify(results.map(r => ({
...r.node,
embedding: undefined,
score: r.score,
})), null, 2));
return;
}
for (const r of results) {
const n = r.node;
console.log(`${chalk.cyan(n.id.slice(0, 8))} [${chalk.magenta(n.kind)}] ${chalk.bold(n.title)} ${chalk.dim(`(${r.score.toFixed(3)})`)}`);
}
});
export const whatCommand = new Command('what')
.description('What should I know right now? Shows relevant context.')
.option('--format <fmt>', 'Output format: text or json', 'text')
.action(async (opts) => {
try {
const context = await gatherWhatContext();
if (opts.format === 'json') {
console.log(JSON.stringify({
gitContext: {
branch: context.gitContext.branch,
modifiedFiles: context.gitContext.modifiedFiles.length,
stagedFiles: context.gitContext.stagedFiles.length,
isGitRepo: context.gitContext.isGitRepo,
},
projectName: context.fileContext.projectName,
branchRelated: context.branchRelated.map(n => ({ id: n.id, title: n.title, kind: n.kind })),
fileRelated: context.fileRelated.map(n => ({ id: n.id, title: n.title, kind: n.kind })),
tasks: context.tasks.map(t => ({ id: t.id, title: t.title, status: t.status })),
decisions: context.decisions.map(d => ({ id: d.id, title: d.title })),
recentMemories: context.recentMemories.map(m => ({ id: m.id, title: m.title })),
}, null, 2));
return;
}
const formatted = formatWhatContext(context);
if (!formatted.trim()) {
console.log(chalk.yellow('No relevant context found.'));
console.log(chalk.dim('Add some memories or open tasks to see context here.'));
return;
}
console.log(chalk.bold.cyan('\n📚 What you should know:\n'));
console.log(formatted);
} catch (err: any) {
console.error(chalk.red(`Error: ${err.message}`));
process.exit(1);
}
});
// Alias for context command that was already defined
export const contextAwareCommand = new Command('now')
.description('Show current context (alias for what)')
.option('--format <fmt>', 'Output format: text or json', 'text')
.action(async (opts) => {
const context = await gatherWhatContext();
if (opts.format === 'json') {
console.log(JSON.stringify({
projectName: context.fileContext.projectName,
branch: context.gitContext.branch,
tasks: context.tasks.length,
decisions: context.decisions.length,
}, null, 2));
return;
}
const formatted = formatWhatContext(context);
if (!formatted.trim()) {
console.log(chalk.yellow('No relevant context found.'));
return;
}
console.log(chalk.bold.cyan('\n📚 Current context:\n'));
console.log(formatted);
});

View File

@@ -23,6 +23,7 @@ import { exportCommand, vizCommand } from './commands/export';
import { importCommand } from './commands/import';
import { backupCommand, restoreDbCommand, listBackupsCommand } from './commands/backup-cmd';
import { graphsCommand, useCommand, initCommand } from './commands/graphs';
import { smartSearchCommand, ssCommand, whatCommand, contextAwareCommand } from './commands/smart';
import { closeDb } from '../core/db';
import { migrateOldDatabase } from '../core/db';
@@ -67,6 +68,10 @@ program.addCommand(listBackupsCommand);
program.addCommand(graphsCommand);
program.addCommand(useCommand);
program.addCommand(initCommand);
program.addCommand(smartSearchCommand);
program.addCommand(ssCommand);
program.addCommand(whatCommand);
program.addCommand(contextAwareCommand);
// Check for old database migration
migrateOldDatabase();

View File

@@ -0,0 +1,175 @@
import { execSync } from 'child_process';
import * as path from 'path';
export interface GitContext {
branch: string;
recentCommits: string[];
modifiedFiles: string[];
stagedFiles: string[];
recentlyTouched: string[];
isGitRepo: boolean;
}
export interface FileContext {
cwd: string;
projectName: string;
modifiedFiles: string[];
stagedFiles: string[];
}
/**
* Extract git context from current working directory
*/
export function getGitContext(): GitContext {
try {
// Check if we're in a git repo
execSync('git rev-parse --is-inside-work-tree', {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
});
const branch = execSync('git branch --show-current', {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
}).trim();
// Recent commit messages
let recentCommits: string[] = [];
try {
const logOutput = execSync('git log --oneline -5', {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
}).trim();
recentCommits = logOutput
.split('\n')
.filter(Boolean)
.map(line => {
// Remove the commit hash prefix
const parts = line.split(' ');
return parts.slice(1).join(' ');
});
} catch {
// No commits yet
}
// Modified (unstaged) files
let modifiedFiles: string[] = [];
try {
modifiedFiles = execSync('git diff --name-only', {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
})
.trim()
.split('\n')
.filter(Boolean);
} catch { /* empty */ }
// Staged files
let stagedFiles: string[] = [];
try {
stagedFiles = execSync('git diff --staged --name-only', {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
})
.trim()
.split('\n')
.filter(Boolean);
} catch { /* empty */ }
// Recently touched files (from recent commits)
let recentlyTouched: string[] = [];
try {
recentlyTouched = execSync('git diff --name-only HEAD~5..HEAD 2>/dev/null || git diff --name-only HEAD', {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
})
.trim()
.split('\n')
.filter(Boolean);
} catch { /* empty */ }
return {
branch,
recentCommits,
modifiedFiles,
stagedFiles,
recentlyTouched,
isGitRepo: true,
};
} catch {
// Not a git repo
return {
branch: '',
recentCommits: [],
modifiedFiles: [],
stagedFiles: [],
recentlyTouched: [],
isGitRepo: false,
};
}
}
/**
* Get file-based context
*/
export function getFileContext(): FileContext {
const cwd = process.cwd();
const projectName = path.basename(cwd);
const gitContext = getGitContext();
return {
cwd,
projectName,
modifiedFiles: gitContext.modifiedFiles,
stagedFiles: gitContext.stagedFiles,
};
}
/**
* Extract meaningful keywords from git context
*/
export function extractGitKeywords(context: GitContext): string[] {
const keywords: string[] = [];
// Branch name (often contains feature/ticket info)
if (context.branch && context.branch !== 'main' && context.branch !== 'master') {
// Split by common delimiters and filter short parts
const parts = context.branch.split(/[-_\/]/).filter(p => p.length > 2);
keywords.push(...parts);
}
// Keywords from commit messages
for (const commit of context.recentCommits.slice(0, 3)) {
// Extract meaningful words (skip common verbs/prepositions)
const words = commit
.toLowerCase()
.replace(/[^\w\s]/g, ' ')
.split(/\s+/)
.filter(w => w.length > 3)
.filter(w => !STOP_WORDS.has(w));
keywords.push(...words.slice(0, 5));
}
// File names (without extension)
for (const file of [...context.modifiedFiles, ...context.stagedFiles].slice(0, 5)) {
const basename = path.basename(file, path.extname(file));
if (basename.length > 2) {
// Split camelCase and kebab-case
const parts = basename
.replace(/([a-z])([A-Z])/g, '$1 $2')
.toLowerCase()
.split(/[-_\s]+/)
.filter(p => p.length > 2);
keywords.push(...parts);
}
}
// Deduplicate and return
return [...new Set(keywords)];
}
const STOP_WORDS = new Set([
'the', 'and', 'for', 'with', 'this', 'that', 'from', 'have', 'been',
'added', 'updated', 'fixed', 'removed', 'changed', 'merge', 'commit',
'feat', 'fix', 'chore', 'docs', 'style', 'refactor', 'test', 'build',
]);

306
src/core/search/smart.ts Normal file
View File

@@ -0,0 +1,306 @@
import { query, listNodes } from '../store';
import { getGitContext, getFileContext, extractGitKeywords, GitContext, FileContext } from './git-context';
import { Node, NodeKind } from '../../types';
export interface SmartSearchOptions {
limit?: number;
kind?: NodeKind;
includeRelated?: boolean;
}
export interface SmartSearchResult {
node: Node;
score: number;
originalScore: number;
boosts: {
time?: number;
file?: number;
branch?: number;
project?: number;
};
reason?: string;
}
export interface WhatContext {
gitContext: GitContext;
fileContext: FileContext;
branchRelated: Node[];
fileRelated: Node[];
tasks: Node[];
decisions: Node[];
recentMemories: Node[];
}
// Time boost factors
const TIME_BOOSTS = {
lastHour: 1.5,
lastDay: 1.3,
lastWeek: 1.1,
older: 1.0,
};
/**
* Smart search that combines explicit query with context signals
*/
export async function smartSearch(
explicitQuery?: string,
options: SmartSearchOptions = {}
): Promise<SmartSearchResult[]> {
const { limit = 20, kind, includeRelated = false } = options;
// Gather context
const gitContext = getGitContext();
const fileContext = getFileContext();
// Build implicit query from context
const contextKeywords = extractGitKeywords(gitContext);
contextKeywords.push(fileContext.projectName);
// Combine queries
const searchQuery = explicitQuery
? `${explicitQuery} ${contextKeywords.slice(0, 5).join(' ')}`
: contextKeywords.join(' ');
if (!searchQuery.trim()) {
// No context, fall back to recent
const recent = listNodes({ limit, kind, includeStale: false });
return recent.map(node => ({
node,
score: 1.0,
originalScore: 1.0,
boosts: {},
}));
}
// Run hybrid search with higher limit for re-ranking
const results = await query(searchQuery, { limit: limit * 3, kind });
// Re-rank based on context signals
const reranked = rerankResults(results, gitContext, fileContext);
// Expand to related if requested
if (includeRelated && reranked.length > 0) {
// Implementation for expanding to related nodes could go here
// For now, we just return the reranked results
}
return reranked.slice(0, limit);
}
/**
* Re-rank search results based on context signals
*/
function rerankResults(
results: Array<{ node: Node; score: number }>,
gitContext: GitContext,
fileContext: FileContext
): SmartSearchResult[] {
const now = Date.now();
const HOUR = 60 * 60 * 1000;
const DAY = 24 * HOUR;
const WEEK = 7 * DAY;
return results
.map(result => {
const boosts: SmartSearchResult['boosts'] = {};
let totalBoost = 1.0;
// Time boost based on last access
const lastAccess = result.node.lastAccessedAt || result.node.updatedAt;
const age = now - lastAccess;
if (age < HOUR) {
boosts.time = TIME_BOOSTS.lastHour;
totalBoost *= TIME_BOOSTS.lastHour;
} else if (age < DAY) {
boosts.time = TIME_BOOSTS.lastDay;
totalBoost *= TIME_BOOSTS.lastDay;
} else if (age < WEEK) {
boosts.time = TIME_BOOSTS.lastWeek;
totalBoost *= TIME_BOOSTS.lastWeek;
}
// File relevance boost
const nodeFiles = (result.node.metadata?.files as string[]) || [];
const nodePath = result.node.metadata?.filePath as string | undefined;
const allFiles = [...nodeFiles];
if (nodePath) allFiles.push(nodePath);
const changedFiles = [...gitContext.modifiedFiles, ...gitContext.stagedFiles];
const fileOverlap = allFiles.filter(f =>
changedFiles.some(cf => f.includes(cf) || cf.includes(f))
).length;
if (fileOverlap > 0) {
boosts.file = 1.0 + (0.2 * fileOverlap);
totalBoost *= boosts.file;
}
// Branch relevance boost
const branchKeywords = gitContext.branch
.split(/[-_\/]/)
.filter(k => k.length > 2)
.map(k => k.toLowerCase());
const tagMatch = result.node.tags.some(tag =>
branchKeywords.some(bk => tag.toLowerCase().includes(bk))
);
const titleMatch = branchKeywords.some(bk =>
result.node.title.toLowerCase().includes(bk)
);
if (tagMatch || titleMatch) {
boosts.branch = 1.3;
totalBoost *= 1.3;
}
// Project name boost
if (result.node.tags.includes(fileContext.projectName.toLowerCase())) {
boosts.project = 1.2;
totalBoost *= 1.2;
}
return {
node: result.node,
score: result.score * totalBoost,
originalScore: result.score,
boosts,
};
})
.sort((a, b) => b.score - a.score);
}
/**
* Gather full context for "what should I know?" command
*/
export async function gatherWhatContext(): Promise<WhatContext> {
const gitContext = getGitContext();
const fileContext = getFileContext();
// Branch-related nodes
let branchRelated: Node[] = [];
if (gitContext.branch && gitContext.branch !== 'main' && gitContext.branch !== 'master') {
const branchKeywords = gitContext.branch.replace(/[-_\/]/g, ' ');
const results = await query(branchKeywords, { limit: 5 });
branchRelated = results.map(r => r.node);
}
// File-related nodes
let fileRelated: Node[] = [];
if (gitContext.modifiedFiles.length > 0 || gitContext.stagedFiles.length > 0) {
const fileNames = [...gitContext.modifiedFiles, ...gitContext.stagedFiles]
.slice(0, 5)
.map(f => f.replace(/\.[^.]+$/, '').replace(/[\/\\]/g, ' '));
const fileQuery = fileNames.join(' ');
if (fileQuery) {
const results = await query(fileQuery, { limit: 5 });
fileRelated = results.map(r => r.node);
}
}
// Open tasks
const tasks = listNodes({
kind: 'task',
status: 'todo',
limit: 10,
includeStale: false,
}).concat(
listNodes({
kind: 'task',
status: 'in_progress',
limit: 5,
includeStale: false,
})
);
// Recent decisions
const decisions = listNodes({
kind: 'decision',
limit: 5,
includeStale: false,
});
// Recent memories (by lastAccessedAt)
const recentMemories = listNodes({
kind: 'memory',
limit: 10,
includeStale: false,
}).sort((a, b) => (b.lastAccessedAt || b.updatedAt) - (a.lastAccessedAt || a.updatedAt))
.slice(0, 5);
return {
gitContext,
fileContext,
branchRelated,
fileRelated,
tasks,
decisions,
recentMemories,
};
}
/**
* Format "what" context for display
*/
export function formatWhatContext(context: WhatContext): string {
const lines: string[] = [];
// Git context summary
if (context.gitContext.isGitRepo) {
lines.push(`📁 Project: ${context.fileContext.projectName}`);
if (context.gitContext.branch) {
lines.push(`🌿 Branch: ${context.gitContext.branch}`);
}
if (context.gitContext.modifiedFiles.length > 0) {
lines.push(`📝 Modified: ${context.gitContext.modifiedFiles.length} files`);
}
lines.push('');
}
// Branch-related
if (context.branchRelated.length > 0) {
lines.push('📚 Related to current branch:');
for (const node of context.branchRelated.slice(0, 3)) {
lines.push(` • [${node.kind}] ${node.title}`);
}
lines.push('');
}
// File-related
if (context.fileRelated.length > 0) {
lines.push('🔗 Related to changes:');
for (const node of context.fileRelated.slice(0, 3)) {
lines.push(` • [${node.kind}] ${node.title}`);
}
lines.push('');
}
// Open tasks
if (context.tasks.length > 0) {
lines.push('✅ Open tasks:');
for (const task of context.tasks.slice(0, 5)) {
const status = task.status === 'in_progress' ? '🔄' : '⬜';
lines.push(` ${status} ${task.title}`);
}
lines.push('');
}
// Recent decisions
if (context.decisions.length > 0) {
lines.push('🎯 Recent decisions:');
for (const decision of context.decisions.slice(0, 3)) {
lines.push(`${decision.title}`);
}
lines.push('');
}
// Recent memories
if (context.recentMemories.length > 0) {
lines.push('💭 Recently accessed:');
for (const memory of context.recentMemories.slice(0, 3)) {
lines.push(`${memory.title}`);
}
}
return lines.join('\n');
}

View File

@@ -923,6 +923,58 @@ server.tool(
}
);
// --- memory_smart_search ---
import { smartSearch, gatherWhatContext, formatWhatContext } from '../core/search/smart';
server.tool(
'memory_smart_search',
'Context-aware search that uses git and file signals for relevance boosting',
{
query: z.string().optional().describe('Optional explicit search query'),
kind: z.enum(['memory', 'component', 'task', 'decision']).optional().describe('Filter by kind'),
limit: z.number().optional().describe('Max results (default: 10)'),
},
async ({ query: searchQuery, kind, limit }) => {
const results = await smartSearch(searchQuery, {
kind: kind as NodeKind,
limit: limit || 10,
});
return {
content: [{
type: 'text' as const,
text: serialize({
count: results.length,
results: results.map(r => ({
id: r.node.id,
kind: r.node.kind,
title: r.node.title,
score: r.score,
originalScore: r.originalScore,
boosts: r.boosts,
tags: r.node.tags,
})),
}),
}],
};
}
);
server.tool(
'memory_what',
'Get relevant context for current work: branch-related nodes, file-related nodes, open tasks, recent decisions',
{},
async () => {
const context = await gatherWhatContext();
const formatted = formatWhatContext(context);
return {
content: [{
type: 'text' as const,
text: formatted || 'No relevant context found.',
}],
};
}
);
// --- memory_index ---
import { indexProject } from '../core/indexer';