Add auto-capture system (Milestone 2)
- Add capture configuration system with modes: always, manual, decisions, off - Add Ollama-based conversation summarization and extraction - Add deduplication via embedding similarity (merge >0.90, link 0.75-0.90) - Add CLI commands: capture, capture-hook, config - Add MCP tools: memory_capture, memory_remember, memory_capture_config - Include summary.ts (previously uncommitted)
This commit is contained in:
68
src/core/capture/config.ts
Normal file
68
src/core/capture/config.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { getDb } from '../db';
|
||||
|
||||
export type CaptureMode = 'always' | 'manual' | 'decisions' | 'off';
|
||||
|
||||
export interface CaptureConfig {
|
||||
mode: CaptureMode;
|
||||
minLength: number;
|
||||
excludePatterns: string[];
|
||||
autoTag: boolean;
|
||||
linkRelated: boolean;
|
||||
similarityThreshold: number;
|
||||
mergeThreshold: number;
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: CaptureConfig = {
|
||||
mode: 'always',
|
||||
minLength: 100,
|
||||
excludePatterns: [],
|
||||
autoTag: true,
|
||||
linkRelated: true,
|
||||
similarityThreshold: 0.75,
|
||||
mergeThreshold: 0.90,
|
||||
};
|
||||
|
||||
function ensureConfigTable(): void {
|
||||
const db = getDb();
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS system_config (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)
|
||||
`);
|
||||
}
|
||||
|
||||
export function getCaptureConfig(): CaptureConfig {
|
||||
ensureConfigTable();
|
||||
const db = getDb();
|
||||
const row = db.prepare('SELECT value FROM system_config WHERE key = ?').get('capture') as { value: string } | undefined;
|
||||
if (!row) return DEFAULT_CONFIG;
|
||||
try {
|
||||
return { ...DEFAULT_CONFIG, ...JSON.parse(row.value) };
|
||||
} catch {
|
||||
return DEFAULT_CONFIG;
|
||||
}
|
||||
}
|
||||
|
||||
export function setCaptureConfig(updates: Partial<CaptureConfig>): CaptureConfig {
|
||||
ensureConfigTable();
|
||||
const db = getDb();
|
||||
const current = getCaptureConfig();
|
||||
const updated = { ...current, ...updates };
|
||||
|
||||
db.prepare(`
|
||||
INSERT INTO system_config (key, value, updated_at) VALUES (?, ?, ?)
|
||||
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at
|
||||
`).run('capture', JSON.stringify(updated), Date.now());
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
export function getConfigValue<K extends keyof CaptureConfig>(key: K): CaptureConfig[K] {
|
||||
return getCaptureConfig()[key];
|
||||
}
|
||||
|
||||
export function setConfigValue<K extends keyof CaptureConfig>(key: K, value: CaptureConfig[K]): void {
|
||||
setCaptureConfig({ [key]: value } as Partial<CaptureConfig>);
|
||||
}
|
||||
113
src/core/capture/dedupe.ts
Normal file
113
src/core/capture/dedupe.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { listNodes, addEdge, updateNode } from '../store';
|
||||
import { getEmbedding } from '../search/ollama';
|
||||
import { cosineSimilarity } from '../search/vector';
|
||||
import { Node } from '../../types';
|
||||
import { getCaptureConfig } from './config';
|
||||
|
||||
export interface SimilarNode {
|
||||
node: Node;
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
export interface DedupeResult {
|
||||
action: 'create' | 'merge' | 'link';
|
||||
existingNode?: Node;
|
||||
similarity?: number;
|
||||
}
|
||||
|
||||
export async function findSimilarNodes(
|
||||
text: string,
|
||||
limit: number = 5
|
||||
): Promise<SimilarNode[]> {
|
||||
const embedding = await getEmbedding(text);
|
||||
if (!embedding) return [];
|
||||
|
||||
const nodes = listNodes({ includeStale: false });
|
||||
const withEmbeddings = nodes.filter(n => n.embedding && n.embedding.length > 0);
|
||||
|
||||
const scored: SimilarNode[] = [];
|
||||
for (const node of withEmbeddings) {
|
||||
const similarity = cosineSimilarity(embedding, node.embedding!);
|
||||
if (similarity > 0.5) {
|
||||
scored.push({ node, similarity });
|
||||
}
|
||||
}
|
||||
|
||||
return scored
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, limit);
|
||||
}
|
||||
|
||||
export async function checkDuplicate(
|
||||
summary: string,
|
||||
content: string
|
||||
): Promise<DedupeResult> {
|
||||
const config = getCaptureConfig();
|
||||
const textToCompare = `${summary} ${content}`;
|
||||
|
||||
const similar = await findSimilarNodes(textToCompare, 1);
|
||||
|
||||
if (similar.length === 0) {
|
||||
return { action: 'create' };
|
||||
}
|
||||
|
||||
const { node, similarity } = similar[0];
|
||||
|
||||
if (similarity >= config.mergeThreshold) {
|
||||
return {
|
||||
action: 'merge',
|
||||
existingNode: node,
|
||||
similarity,
|
||||
};
|
||||
}
|
||||
|
||||
if (similarity >= config.similarityThreshold) {
|
||||
return {
|
||||
action: 'link',
|
||||
existingNode: node,
|
||||
similarity,
|
||||
};
|
||||
}
|
||||
|
||||
return { action: 'create' };
|
||||
}
|
||||
|
||||
export async function mergeIntoNode(
|
||||
existingId: string,
|
||||
newSummary: string,
|
||||
newContent: string,
|
||||
newTags: string[]
|
||||
): Promise<Node | null> {
|
||||
const existing = listNodes({ includeStale: false }).find(n => n.id === existingId);
|
||||
if (!existing) return null;
|
||||
|
||||
// Append new content with timestamp
|
||||
const timestamp = new Date().toISOString().slice(0, 10);
|
||||
const mergedContent = existing.content
|
||||
? `${existing.content}\n\n---\n[${timestamp}]\n${newContent}`
|
||||
: newContent;
|
||||
|
||||
// Merge tags (dedupe)
|
||||
const mergedTags = [...new Set([...existing.tags, ...newTags])];
|
||||
|
||||
// Update the existing node
|
||||
return updateNode(existingId, {
|
||||
content: mergedContent,
|
||||
tags: mergedTags,
|
||||
metadata: {
|
||||
...existing.metadata,
|
||||
lastMergedAt: Date.now(),
|
||||
mergeCount: (existing.metadata.mergeCount || 0) + 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function linkRelatedNode(
|
||||
newNodeId: string,
|
||||
existingNodeId: string
|
||||
): Promise<void> {
|
||||
addEdge(newNodeId, existingNodeId, 'relates_to', {
|
||||
reason: 'auto-capture-similarity',
|
||||
linkedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
192
src/core/capture/index.ts
Normal file
192
src/core/capture/index.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
import { addNode } from '../store';
|
||||
import { getCaptureConfig, CaptureConfig } from './config';
|
||||
import { extractMemoryData, shouldCapture, ExtractedMemory } from './summarize';
|
||||
import { checkDuplicate, mergeIntoNode, linkRelatedNode } from './dedupe';
|
||||
import { Node } from '../../types';
|
||||
|
||||
export { getCaptureConfig, setCaptureConfig, CaptureMode, CaptureConfig } from './config';
|
||||
export { extractMemoryData, shouldCapture, ExtractedMemory } from './summarize';
|
||||
export { findSimilarNodes, checkDuplicate, mergeIntoNode } from './dedupe';
|
||||
|
||||
export interface CaptureInput {
|
||||
conversation: string;
|
||||
sessionId?: string;
|
||||
filesChanged?: string[];
|
||||
source?: string;
|
||||
}
|
||||
|
||||
export interface CaptureResult {
|
||||
captured: boolean;
|
||||
action: 'created' | 'merged' | 'linked' | 'skipped';
|
||||
node?: Node;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export async function captureConversation(input: CaptureInput): Promise<CaptureResult> {
|
||||
const config = getCaptureConfig();
|
||||
|
||||
// Check if capture is enabled
|
||||
if (config.mode === 'off') {
|
||||
return { captured: false, action: 'skipped', reason: 'capture disabled' };
|
||||
}
|
||||
|
||||
// Check minimum length
|
||||
if (!shouldCapture(input.conversation, config.minLength)) {
|
||||
return { captured: false, action: 'skipped', reason: 'conversation too short or trivial' };
|
||||
}
|
||||
|
||||
// Check exclude patterns
|
||||
for (const pattern of config.excludePatterns) {
|
||||
try {
|
||||
if (new RegExp(pattern, 'i').test(input.conversation)) {
|
||||
return { captured: false, action: 'skipped', reason: `matched exclude pattern: ${pattern}` };
|
||||
}
|
||||
} catch {
|
||||
// Invalid regex, skip
|
||||
}
|
||||
}
|
||||
|
||||
// Extract memory data using Ollama
|
||||
const extracted = await extractMemoryData(input.conversation);
|
||||
if (!extracted) {
|
||||
return { captured: false, action: 'skipped', reason: 'failed to extract memory data' };
|
||||
}
|
||||
|
||||
// For "decisions" mode, only capture if decisions were found
|
||||
if (config.mode === 'decisions' && extracted.decisions.length === 0) {
|
||||
return { captured: false, action: 'skipped', reason: 'no decisions found (decisions mode)' };
|
||||
}
|
||||
|
||||
// Build content
|
||||
const contentParts: string[] = [extracted.summary];
|
||||
|
||||
if (extracted.decisions.length > 0) {
|
||||
contentParts.push('\n## Decisions');
|
||||
for (const d of extracted.decisions) {
|
||||
contentParts.push(`- ${d}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (extracted.filesDiscussed.length > 0 || input.filesChanged?.length) {
|
||||
const files = [...new Set([...extracted.filesDiscussed, ...(input.filesChanged || [])])];
|
||||
contentParts.push('\n## Files');
|
||||
for (const f of files) {
|
||||
contentParts.push(`- ${f}`);
|
||||
}
|
||||
}
|
||||
|
||||
const content = contentParts.join('\n');
|
||||
|
||||
// Check for duplicates
|
||||
const dedupeResult = await checkDuplicate(extracted.summary, content);
|
||||
|
||||
// Build tags
|
||||
const tags = ['auto-capture'];
|
||||
if (config.autoTag && extracted.topics.length > 0) {
|
||||
tags.push(...extracted.topics);
|
||||
}
|
||||
if (input.source) {
|
||||
tags.push(`source:${input.source}`);
|
||||
}
|
||||
|
||||
if (dedupeResult.action === 'merge' && dedupeResult.existingNode) {
|
||||
// Merge into existing node
|
||||
const merged = await mergeIntoNode(
|
||||
dedupeResult.existingNode.id,
|
||||
extracted.summary,
|
||||
content,
|
||||
tags
|
||||
);
|
||||
return {
|
||||
captured: true,
|
||||
action: 'merged',
|
||||
node: merged || undefined,
|
||||
reason: `merged with existing node (similarity: ${(dedupeResult.similarity! * 100).toFixed(1)}%)`,
|
||||
};
|
||||
}
|
||||
|
||||
// Create new node
|
||||
const node = await addNode({
|
||||
kind: 'memory',
|
||||
title: extracted.summary.slice(0, 100),
|
||||
content,
|
||||
tags,
|
||||
status: 'active',
|
||||
metadata: {
|
||||
sessionId: input.sessionId,
|
||||
filesChanged: input.filesChanged,
|
||||
source: input.source || 'claude-code',
|
||||
capturedAt: Date.now(),
|
||||
decisions: extracted.decisions,
|
||||
},
|
||||
});
|
||||
|
||||
// Link to related node if found
|
||||
if (dedupeResult.action === 'link' && dedupeResult.existingNode && config.linkRelated) {
|
||||
await linkRelatedNode(node.id, dedupeResult.existingNode.id);
|
||||
return {
|
||||
captured: true,
|
||||
action: 'linked',
|
||||
node,
|
||||
reason: `linked to related node (similarity: ${(dedupeResult.similarity! * 100).toFixed(1)}%)`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
captured: true,
|
||||
action: 'created',
|
||||
node,
|
||||
};
|
||||
}
|
||||
|
||||
export async function captureText(
|
||||
text: string,
|
||||
options: { tags?: string[]; source?: string } = {}
|
||||
): Promise<CaptureResult> {
|
||||
const config = getCaptureConfig();
|
||||
|
||||
if (config.mode === 'off') {
|
||||
return { captured: false, action: 'skipped', reason: 'capture disabled' };
|
||||
}
|
||||
|
||||
// Simple text capture - no summarization needed
|
||||
const dedupeResult = await checkDuplicate(text, text);
|
||||
|
||||
const tags = ['manual-capture', ...(options.tags || [])];
|
||||
if (options.source) {
|
||||
tags.push(`source:${options.source}`);
|
||||
}
|
||||
|
||||
if (dedupeResult.action === 'merge' && dedupeResult.existingNode) {
|
||||
const merged = await mergeIntoNode(
|
||||
dedupeResult.existingNode.id,
|
||||
text.slice(0, 100),
|
||||
text,
|
||||
tags
|
||||
);
|
||||
return {
|
||||
captured: true,
|
||||
action: 'merged',
|
||||
node: merged || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
const node = await addNode({
|
||||
kind: 'memory',
|
||||
title: text.slice(0, 100),
|
||||
content: text,
|
||||
tags,
|
||||
status: 'active',
|
||||
metadata: {
|
||||
source: options.source || 'manual',
|
||||
capturedAt: Date.now(),
|
||||
},
|
||||
});
|
||||
|
||||
if (dedupeResult.action === 'link' && dedupeResult.existingNode && config.linkRelated) {
|
||||
await linkRelatedNode(node.id, dedupeResult.existingNode.id);
|
||||
return { captured: true, action: 'linked', node };
|
||||
}
|
||||
|
||||
return { captured: true, action: 'created', node };
|
||||
}
|
||||
160
src/core/capture/summarize.ts
Normal file
160
src/core/capture/summarize.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { generate, isGenAvailable } from '../search/ollamaGen';
|
||||
|
||||
export interface ExtractedMemory {
|
||||
summary: string;
|
||||
topics: string[];
|
||||
decisions: string[];
|
||||
filesDiscussed: string[];
|
||||
}
|
||||
|
||||
const SUMMARIZE_PROMPT = `Summarize this Claude Code conversation in 1-2 sentences.
|
||||
Focus on: what was accomplished, decisions made, problems solved.
|
||||
Do NOT include greetings or meta-discussion.
|
||||
|
||||
Conversation:
|
||||
{conversation}
|
||||
|
||||
Summary:`;
|
||||
|
||||
const EXTRACT_PROMPT = `Extract from this conversation:
|
||||
1. Main topics (as tags, lowercase, hyphenated, max 5)
|
||||
2. Decisions made (if any, max 3)
|
||||
3. Code files discussed or modified (if any)
|
||||
|
||||
Conversation:
|
||||
{conversation}
|
||||
|
||||
Output as JSON only, no explanation:
|
||||
{"topics": [], "decisions": [], "files": []}`;
|
||||
|
||||
export async function summarizeConversation(conversation: string): Promise<string | null> {
|
||||
if (!(await isGenAvailable())) return null;
|
||||
|
||||
const prompt = SUMMARIZE_PROMPT.replace('{conversation}', conversation);
|
||||
return generate(prompt);
|
||||
}
|
||||
|
||||
export async function extractMemoryData(conversation: string): Promise<ExtractedMemory | null> {
|
||||
const available = await isGenAvailable();
|
||||
|
||||
// Get summary
|
||||
const summary = available
|
||||
? await summarizeConversation(conversation)
|
||||
: createFallbackSummary(conversation);
|
||||
|
||||
if (!summary) return null;
|
||||
|
||||
// Extract structured data
|
||||
let topics: string[] = [];
|
||||
let decisions: string[] = [];
|
||||
let filesDiscussed: string[] = [];
|
||||
|
||||
if (available) {
|
||||
const extractPrompt = EXTRACT_PROMPT.replace('{conversation}', conversation);
|
||||
const extracted = await generate(extractPrompt);
|
||||
|
||||
if (extracted) {
|
||||
try {
|
||||
// Find JSON in response (handle cases where model adds explanation)
|
||||
const jsonMatch = extracted.match(/\{[\s\S]*\}/);
|
||||
if (jsonMatch) {
|
||||
const data = JSON.parse(jsonMatch[0]);
|
||||
topics = Array.isArray(data.topics) ? data.topics.slice(0, 5) : [];
|
||||
decisions = Array.isArray(data.decisions) ? data.decisions.slice(0, 3) : [];
|
||||
filesDiscussed = Array.isArray(data.files) ? data.files : [];
|
||||
}
|
||||
} catch {
|
||||
// Fall back to basic extraction
|
||||
topics = extractTopicsBasic(conversation);
|
||||
filesDiscussed = extractFilesBasic(conversation);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Basic extraction without AI
|
||||
topics = extractTopicsBasic(conversation);
|
||||
filesDiscussed = extractFilesBasic(conversation);
|
||||
}
|
||||
|
||||
return {
|
||||
summary,
|
||||
topics: sanitizeTags(topics),
|
||||
decisions,
|
||||
filesDiscussed,
|
||||
};
|
||||
}
|
||||
|
||||
function createFallbackSummary(conversation: string): string {
|
||||
// Take first meaningful line as summary
|
||||
const lines = conversation.split('\n').filter(l => l.trim().length > 20);
|
||||
if (lines.length === 0) return 'Conversation captured';
|
||||
|
||||
const first = lines[0].trim();
|
||||
return first.length > 150 ? first.slice(0, 147) + '...' : first;
|
||||
}
|
||||
|
||||
function extractTopicsBasic(conversation: string): string[] {
|
||||
const topics: string[] = [];
|
||||
const lower = conversation.toLowerCase();
|
||||
|
||||
// Common programming topics
|
||||
const keywords = [
|
||||
'typescript', 'javascript', 'python', 'rust', 'go',
|
||||
'react', 'vue', 'angular', 'node', 'express',
|
||||
'database', 'sql', 'api', 'auth', 'authentication',
|
||||
'bug', 'fix', 'error', 'refactor', 'test', 'deploy',
|
||||
'git', 'docker', 'kubernetes', 'aws', 'cloud',
|
||||
];
|
||||
|
||||
for (const kw of keywords) {
|
||||
if (lower.includes(kw) && topics.length < 5) {
|
||||
topics.push(kw);
|
||||
}
|
||||
}
|
||||
|
||||
return topics;
|
||||
}
|
||||
|
||||
function extractFilesBasic(conversation: string): string[] {
|
||||
const files: string[] = [];
|
||||
|
||||
// Match file paths
|
||||
const filePatterns = [
|
||||
/[\w\-\/]+\.(ts|js|tsx|jsx|py|rs|go|md|json|yaml|yml|toml|sql)/gi,
|
||||
/src\/[\w\-\/]+/gi,
|
||||
];
|
||||
|
||||
for (const pattern of filePatterns) {
|
||||
const matches = conversation.match(pattern);
|
||||
if (matches) {
|
||||
for (const m of matches) {
|
||||
if (!files.includes(m) && files.length < 10) {
|
||||
files.push(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
function sanitizeTags(tags: string[]): string[] {
|
||||
return tags
|
||||
.map(t => t.toLowerCase().trim().replace(/\s+/g, '-').replace(/[^a-z0-9\-]/g, ''))
|
||||
.filter(t => t.length > 0 && t.length < 30);
|
||||
}
|
||||
|
||||
export function shouldCapture(conversation: string, minLength: number): boolean {
|
||||
// Skip very short conversations
|
||||
if (conversation.length < minLength) return false;
|
||||
|
||||
// Skip if mostly greetings/pleasantries
|
||||
const lower = conversation.toLowerCase();
|
||||
const greetings = ['hello', 'hi ', 'hey', 'thanks', 'thank you', 'goodbye', 'bye'];
|
||||
const greetingCount = greetings.filter(g => lower.includes(g)).length;
|
||||
|
||||
// If more than half the "content" is greetings, skip
|
||||
const words = conversation.split(/\s+/).length;
|
||||
if (words < 20 && greetingCount > 2) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user