Add auto-capture system (Milestone 2)

- Add capture configuration system with modes: always, manual, decisions, off
- Add Ollama-based conversation summarization and extraction
- Add deduplication via embedding similarity (merge >0.90, link 0.75-0.90)
- Add CLI commands: capture, capture-hook, config
- Add MCP tools: memory_capture, memory_remember, memory_capture_config
- Include summary.ts (previously uncommitted)
This commit is contained in:
2026-02-03 09:59:49 +01:00
parent 999c748d3d
commit 7a4dc07038
8 changed files with 941 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
import { getDb } from '../db';
export type CaptureMode = 'always' | 'manual' | 'decisions' | 'off';
export interface CaptureConfig {
mode: CaptureMode;
minLength: number;
excludePatterns: string[];
autoTag: boolean;
linkRelated: boolean;
similarityThreshold: number;
mergeThreshold: number;
}
const DEFAULT_CONFIG: CaptureConfig = {
mode: 'always',
minLength: 100,
excludePatterns: [],
autoTag: true,
linkRelated: true,
similarityThreshold: 0.75,
mergeThreshold: 0.90,
};
function ensureConfigTable(): void {
const db = getDb();
db.exec(`
CREATE TABLE IF NOT EXISTS system_config (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at INTEGER NOT NULL
)
`);
}
export function getCaptureConfig(): CaptureConfig {
ensureConfigTable();
const db = getDb();
const row = db.prepare('SELECT value FROM system_config WHERE key = ?').get('capture') as { value: string } | undefined;
if (!row) return DEFAULT_CONFIG;
try {
return { ...DEFAULT_CONFIG, ...JSON.parse(row.value) };
} catch {
return DEFAULT_CONFIG;
}
}
export function setCaptureConfig(updates: Partial<CaptureConfig>): CaptureConfig {
ensureConfigTable();
const db = getDb();
const current = getCaptureConfig();
const updated = { ...current, ...updates };
db.prepare(`
INSERT INTO system_config (key, value, updated_at) VALUES (?, ?, ?)
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at
`).run('capture', JSON.stringify(updated), Date.now());
return updated;
}
export function getConfigValue<K extends keyof CaptureConfig>(key: K): CaptureConfig[K] {
return getCaptureConfig()[key];
}
export function setConfigValue<K extends keyof CaptureConfig>(key: K, value: CaptureConfig[K]): void {
setCaptureConfig({ [key]: value } as Partial<CaptureConfig>);
}

113
src/core/capture/dedupe.ts Normal file
View File

@@ -0,0 +1,113 @@
import { listNodes, addEdge, updateNode } from '../store';
import { getEmbedding } from '../search/ollama';
import { cosineSimilarity } from '../search/vector';
import { Node } from '../../types';
import { getCaptureConfig } from './config';
export interface SimilarNode {
node: Node;
similarity: number;
}
export interface DedupeResult {
action: 'create' | 'merge' | 'link';
existingNode?: Node;
similarity?: number;
}
export async function findSimilarNodes(
text: string,
limit: number = 5
): Promise<SimilarNode[]> {
const embedding = await getEmbedding(text);
if (!embedding) return [];
const nodes = listNodes({ includeStale: false });
const withEmbeddings = nodes.filter(n => n.embedding && n.embedding.length > 0);
const scored: SimilarNode[] = [];
for (const node of withEmbeddings) {
const similarity = cosineSimilarity(embedding, node.embedding!);
if (similarity > 0.5) {
scored.push({ node, similarity });
}
}
return scored
.sort((a, b) => b.similarity - a.similarity)
.slice(0, limit);
}
export async function checkDuplicate(
summary: string,
content: string
): Promise<DedupeResult> {
const config = getCaptureConfig();
const textToCompare = `${summary} ${content}`;
const similar = await findSimilarNodes(textToCompare, 1);
if (similar.length === 0) {
return { action: 'create' };
}
const { node, similarity } = similar[0];
if (similarity >= config.mergeThreshold) {
return {
action: 'merge',
existingNode: node,
similarity,
};
}
if (similarity >= config.similarityThreshold) {
return {
action: 'link',
existingNode: node,
similarity,
};
}
return { action: 'create' };
}
export async function mergeIntoNode(
existingId: string,
newSummary: string,
newContent: string,
newTags: string[]
): Promise<Node | null> {
const existing = listNodes({ includeStale: false }).find(n => n.id === existingId);
if (!existing) return null;
// Append new content with timestamp
const timestamp = new Date().toISOString().slice(0, 10);
const mergedContent = existing.content
? `${existing.content}\n\n---\n[${timestamp}]\n${newContent}`
: newContent;
// Merge tags (dedupe)
const mergedTags = [...new Set([...existing.tags, ...newTags])];
// Update the existing node
return updateNode(existingId, {
content: mergedContent,
tags: mergedTags,
metadata: {
...existing.metadata,
lastMergedAt: Date.now(),
mergeCount: (existing.metadata.mergeCount || 0) + 1,
},
});
}
export async function linkRelatedNode(
newNodeId: string,
existingNodeId: string
): Promise<void> {
addEdge(newNodeId, existingNodeId, 'relates_to', {
reason: 'auto-capture-similarity',
linkedAt: Date.now(),
});
}

192
src/core/capture/index.ts Normal file
View File

@@ -0,0 +1,192 @@
import { addNode } from '../store';
import { getCaptureConfig, CaptureConfig } from './config';
import { extractMemoryData, shouldCapture, ExtractedMemory } from './summarize';
import { checkDuplicate, mergeIntoNode, linkRelatedNode } from './dedupe';
import { Node } from '../../types';
export { getCaptureConfig, setCaptureConfig, CaptureMode, CaptureConfig } from './config';
export { extractMemoryData, shouldCapture, ExtractedMemory } from './summarize';
export { findSimilarNodes, checkDuplicate, mergeIntoNode } from './dedupe';
export interface CaptureInput {
conversation: string;
sessionId?: string;
filesChanged?: string[];
source?: string;
}
export interface CaptureResult {
captured: boolean;
action: 'created' | 'merged' | 'linked' | 'skipped';
node?: Node;
reason?: string;
}
export async function captureConversation(input: CaptureInput): Promise<CaptureResult> {
const config = getCaptureConfig();
// Check if capture is enabled
if (config.mode === 'off') {
return { captured: false, action: 'skipped', reason: 'capture disabled' };
}
// Check minimum length
if (!shouldCapture(input.conversation, config.minLength)) {
return { captured: false, action: 'skipped', reason: 'conversation too short or trivial' };
}
// Check exclude patterns
for (const pattern of config.excludePatterns) {
try {
if (new RegExp(pattern, 'i').test(input.conversation)) {
return { captured: false, action: 'skipped', reason: `matched exclude pattern: ${pattern}` };
}
} catch {
// Invalid regex, skip
}
}
// Extract memory data using Ollama
const extracted = await extractMemoryData(input.conversation);
if (!extracted) {
return { captured: false, action: 'skipped', reason: 'failed to extract memory data' };
}
// For "decisions" mode, only capture if decisions were found
if (config.mode === 'decisions' && extracted.decisions.length === 0) {
return { captured: false, action: 'skipped', reason: 'no decisions found (decisions mode)' };
}
// Build content
const contentParts: string[] = [extracted.summary];
if (extracted.decisions.length > 0) {
contentParts.push('\n## Decisions');
for (const d of extracted.decisions) {
contentParts.push(`- ${d}`);
}
}
if (extracted.filesDiscussed.length > 0 || input.filesChanged?.length) {
const files = [...new Set([...extracted.filesDiscussed, ...(input.filesChanged || [])])];
contentParts.push('\n## Files');
for (const f of files) {
contentParts.push(`- ${f}`);
}
}
const content = contentParts.join('\n');
// Check for duplicates
const dedupeResult = await checkDuplicate(extracted.summary, content);
// Build tags
const tags = ['auto-capture'];
if (config.autoTag && extracted.topics.length > 0) {
tags.push(...extracted.topics);
}
if (input.source) {
tags.push(`source:${input.source}`);
}
if (dedupeResult.action === 'merge' && dedupeResult.existingNode) {
// Merge into existing node
const merged = await mergeIntoNode(
dedupeResult.existingNode.id,
extracted.summary,
content,
tags
);
return {
captured: true,
action: 'merged',
node: merged || undefined,
reason: `merged with existing node (similarity: ${(dedupeResult.similarity! * 100).toFixed(1)}%)`,
};
}
// Create new node
const node = await addNode({
kind: 'memory',
title: extracted.summary.slice(0, 100),
content,
tags,
status: 'active',
metadata: {
sessionId: input.sessionId,
filesChanged: input.filesChanged,
source: input.source || 'claude-code',
capturedAt: Date.now(),
decisions: extracted.decisions,
},
});
// Link to related node if found
if (dedupeResult.action === 'link' && dedupeResult.existingNode && config.linkRelated) {
await linkRelatedNode(node.id, dedupeResult.existingNode.id);
return {
captured: true,
action: 'linked',
node,
reason: `linked to related node (similarity: ${(dedupeResult.similarity! * 100).toFixed(1)}%)`,
};
}
return {
captured: true,
action: 'created',
node,
};
}
export async function captureText(
text: string,
options: { tags?: string[]; source?: string } = {}
): Promise<CaptureResult> {
const config = getCaptureConfig();
if (config.mode === 'off') {
return { captured: false, action: 'skipped', reason: 'capture disabled' };
}
// Simple text capture - no summarization needed
const dedupeResult = await checkDuplicate(text, text);
const tags = ['manual-capture', ...(options.tags || [])];
if (options.source) {
tags.push(`source:${options.source}`);
}
if (dedupeResult.action === 'merge' && dedupeResult.existingNode) {
const merged = await mergeIntoNode(
dedupeResult.existingNode.id,
text.slice(0, 100),
text,
tags
);
return {
captured: true,
action: 'merged',
node: merged || undefined,
};
}
const node = await addNode({
kind: 'memory',
title: text.slice(0, 100),
content: text,
tags,
status: 'active',
metadata: {
source: options.source || 'manual',
capturedAt: Date.now(),
},
});
if (dedupeResult.action === 'link' && dedupeResult.existingNode && config.linkRelated) {
await linkRelatedNode(node.id, dedupeResult.existingNode.id);
return { captured: true, action: 'linked', node };
}
return { captured: true, action: 'created', node };
}

View File

@@ -0,0 +1,160 @@
import { generate, isGenAvailable } from '../search/ollamaGen';
export interface ExtractedMemory {
summary: string;
topics: string[];
decisions: string[];
filesDiscussed: string[];
}
const SUMMARIZE_PROMPT = `Summarize this Claude Code conversation in 1-2 sentences.
Focus on: what was accomplished, decisions made, problems solved.
Do NOT include greetings or meta-discussion.
Conversation:
{conversation}
Summary:`;
const EXTRACT_PROMPT = `Extract from this conversation:
1. Main topics (as tags, lowercase, hyphenated, max 5)
2. Decisions made (if any, max 3)
3. Code files discussed or modified (if any)
Conversation:
{conversation}
Output as JSON only, no explanation:
{"topics": [], "decisions": [], "files": []}`;
export async function summarizeConversation(conversation: string): Promise<string | null> {
if (!(await isGenAvailable())) return null;
const prompt = SUMMARIZE_PROMPT.replace('{conversation}', conversation);
return generate(prompt);
}
export async function extractMemoryData(conversation: string): Promise<ExtractedMemory | null> {
const available = await isGenAvailable();
// Get summary
const summary = available
? await summarizeConversation(conversation)
: createFallbackSummary(conversation);
if (!summary) return null;
// Extract structured data
let topics: string[] = [];
let decisions: string[] = [];
let filesDiscussed: string[] = [];
if (available) {
const extractPrompt = EXTRACT_PROMPT.replace('{conversation}', conversation);
const extracted = await generate(extractPrompt);
if (extracted) {
try {
// Find JSON in response (handle cases where model adds explanation)
const jsonMatch = extracted.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const data = JSON.parse(jsonMatch[0]);
topics = Array.isArray(data.topics) ? data.topics.slice(0, 5) : [];
decisions = Array.isArray(data.decisions) ? data.decisions.slice(0, 3) : [];
filesDiscussed = Array.isArray(data.files) ? data.files : [];
}
} catch {
// Fall back to basic extraction
topics = extractTopicsBasic(conversation);
filesDiscussed = extractFilesBasic(conversation);
}
}
} else {
// Basic extraction without AI
topics = extractTopicsBasic(conversation);
filesDiscussed = extractFilesBasic(conversation);
}
return {
summary,
topics: sanitizeTags(topics),
decisions,
filesDiscussed,
};
}
function createFallbackSummary(conversation: string): string {
// Take first meaningful line as summary
const lines = conversation.split('\n').filter(l => l.trim().length > 20);
if (lines.length === 0) return 'Conversation captured';
const first = lines[0].trim();
return first.length > 150 ? first.slice(0, 147) + '...' : first;
}
function extractTopicsBasic(conversation: string): string[] {
const topics: string[] = [];
const lower = conversation.toLowerCase();
// Common programming topics
const keywords = [
'typescript', 'javascript', 'python', 'rust', 'go',
'react', 'vue', 'angular', 'node', 'express',
'database', 'sql', 'api', 'auth', 'authentication',
'bug', 'fix', 'error', 'refactor', 'test', 'deploy',
'git', 'docker', 'kubernetes', 'aws', 'cloud',
];
for (const kw of keywords) {
if (lower.includes(kw) && topics.length < 5) {
topics.push(kw);
}
}
return topics;
}
function extractFilesBasic(conversation: string): string[] {
const files: string[] = [];
// Match file paths
const filePatterns = [
/[\w\-\/]+\.(ts|js|tsx|jsx|py|rs|go|md|json|yaml|yml|toml|sql)/gi,
/src\/[\w\-\/]+/gi,
];
for (const pattern of filePatterns) {
const matches = conversation.match(pattern);
if (matches) {
for (const m of matches) {
if (!files.includes(m) && files.length < 10) {
files.push(m);
}
}
}
}
return files;
}
function sanitizeTags(tags: string[]): string[] {
return tags
.map(t => t.toLowerCase().trim().replace(/\s+/g, '-').replace(/[^a-z0-9\-]/g, ''))
.filter(t => t.length > 0 && t.length < 30);
}
export function shouldCapture(conversation: string, minLength: number): boolean {
// Skip very short conversations
if (conversation.length < minLength) return false;
// Skip if mostly greetings/pleasantries
const lower = conversation.toLowerCase();
const greetings = ['hello', 'hi ', 'hey', 'thanks', 'thank you', 'goodbye', 'bye'];
const greetingCount = greetings.filter(g => lower.includes(g)).length;
// If more than half the "content" is greetings, skip
const words = conversation.split(/\s+/).length;
if (words < 20 && greetingCount > 2) return false;
return true;
}

165
src/core/summary.ts Normal file
View File

@@ -0,0 +1,165 @@
import { getDb } from './db';
import { isGenAvailable, generate } from './search/ollamaGen';
export interface SummaryData {
generatedAt: number;
overview: string;
components: { title: string; status?: string; summary?: string }[];
decisions: { title: string; status?: string }[];
tasks: { todo: string[]; in_progress: string[]; done: string[] };
memories: { byTag: Record<string, string[]> };
stats: { total: number; stale: number; orphans: number; edges: number };
}
// Ensure cache table exists
function ensureCacheTable(): void {
const db = getDb();
db.exec(`
CREATE TABLE IF NOT EXISTS system_cache (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at INTEGER NOT NULL
)
`);
}
export function getCachedSummary(): SummaryData | null {
ensureCacheTable();
const db = getDb();
const row = db.prepare('SELECT value FROM system_cache WHERE key = ?').get('summary') as { value: string } | undefined;
if (!row) return null;
try {
return JSON.parse(row.value);
} catch {
return null;
}
}
export function cacheSummary(data: SummaryData): void {
ensureCacheTable();
const db = getDb();
db.prepare(`
INSERT INTO system_cache (key, value, updated_at) VALUES (?, ?, ?)
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at
`).run('summary', JSON.stringify(data), Date.now());
}
export async function generateSummary(): Promise<SummaryData> {
const db = getDb();
const now = Date.now();
// Gather all active nodes
const rows = db.prepare(`
SELECT id, kind, title, status, tags, metadata, content
FROM nodes WHERE is_stale = 0
ORDER BY kind, title
`).all() as any[];
// Stats
const totalNodes = rows.length;
const staleCount = (db.prepare('SELECT COUNT(*) as c FROM nodes WHERE is_stale = 1').get() as any).c;
const edgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as any).c;
const edgeRows = db.prepare('SELECT from_id, to_id FROM edges').all() as any[];
const hasEdge = new Set<string>();
for (const e of edgeRows) {
hasEdge.add(e.from_id);
hasEdge.add(e.to_id);
}
const orphanCount = rows.filter(r => !hasEdge.has(r.id)).length;
// Categorize nodes
const components: SummaryData['components'] = [];
const decisions: SummaryData['decisions'] = [];
const tasks: SummaryData['tasks'] = { todo: [], in_progress: [], done: [] };
const memoryTags: Record<string, string[]> = {};
for (const row of rows) {
const meta = JSON.parse(row.metadata || '{}');
if (row.kind === 'component') {
components.push({
title: row.title,
status: row.status,
summary: meta.summary || (row.content?.slice(0, 100) + (row.content?.length > 100 ? '...' : '')),
});
} else if (row.kind === 'decision') {
decisions.push({ title: row.title, status: row.status });
} else if (row.kind === 'task') {
const status = (row.status || 'todo').toLowerCase();
if (status === 'done' || status === 'completed') {
tasks.done.push(row.title);
} else if (status === 'in_progress' || status === 'in-progress') {
tasks.in_progress.push(row.title);
} else {
tasks.todo.push(row.title);
}
} else if (row.kind === 'memory') {
const tags: string[] = JSON.parse(row.tags || '[]');
// Group by first tag, or 'untagged'
const primaryTag = tags[0] || 'untagged';
if (!memoryTags[primaryTag]) memoryTags[primaryTag] = [];
memoryTags[primaryTag].push(row.title);
}
}
// Sort tags by count descending, limit to top 10
const sortedTags = Object.entries(memoryTags)
.sort((a, b) => b[1].length - a[1].length)
.slice(0, 10);
const limitedMemoryTags: Record<string, string[]> = {};
for (const [tag, titles] of sortedTags) {
// Limit to 5 titles per tag
limitedMemoryTags[tag] = titles.slice(0, 5);
if (titles.length > 5) {
limitedMemoryTags[tag].push(`+${titles.length - 5} more`);
}
}
// Generate overview
let overview: string;
const aiAvailable = await isGenAvailable();
if (aiAvailable) {
const prompt = `Write a single sentence (max 30 words) summarizing this knowledge graph for a developer starting a coding session.
Stats: ${totalNodes} nodes (${components.length} components, ${decisions.length} decisions, ${tasks.todo.length + tasks.in_progress.length + tasks.done.length} tasks, ${totalNodes - components.length - decisions.length - tasks.todo.length - tasks.in_progress.length - tasks.done.length} memories), ${edgeCount} edges.
Components: ${components.map(c => c.title).join(', ')}
Active tasks: ${[...tasks.todo, ...tasks.in_progress].join(', ') || 'none'}
Output ONLY the summary sentence.`;
const aiOverview = await generate(prompt);
overview = aiOverview || buildFallbackOverview(totalNodes, components.length, decisions.length, tasks, edgeCount);
} else {
overview = buildFallbackOverview(totalNodes, components.length, decisions.length, tasks, edgeCount);
}
const summary: SummaryData = {
generatedAt: now,
overview,
components,
decisions,
tasks,
memories: { byTag: limitedMemoryTags },
stats: { total: totalNodes, stale: staleCount, orphans: orphanCount, edges: edgeCount },
};
cacheSummary(summary);
return summary;
}
function buildFallbackOverview(
total: number,
components: number,
decisions: number,
tasks: SummaryData['tasks'],
edges: number
): string {
const pending = tasks.todo.length + tasks.in_progress.length;
const parts = [`${total} nodes`, `${components} components`, `${decisions} decisions`];
if (pending > 0) parts.push(`${pending} open tasks`);
parts.push(`${edges} edges`);
return parts.join(', ') + '.';
}