Add freshness scoring, auto-decay, and USAGE.md
Add lastAccessedAt timestamp to nodes with schema migration and backfill. Touch timestamp on read, apply exponential freshness decay (~69-day half-life) to search scoring alongside BM25 and vector weights. Add auto-decay that marks untouched nodes as stale after a configurable threshold, with CLI command and server-side daily interval. Include comprehensive USAGE.md documenting all CLI commands and REST API.
This commit is contained in:
@@ -61,6 +61,13 @@ export function getDb(): Database.Database {
|
||||
_db.pragma('foreign_keys = ON');
|
||||
_db.exec(SCHEMA);
|
||||
|
||||
// Migration: add last_accessed_at column
|
||||
const cols = _db.prepare("PRAGMA table_info(nodes)").all() as any[];
|
||||
if (!cols.some((c: any) => c.name === 'last_accessed_at')) {
|
||||
_db.exec('ALTER TABLE nodes ADD COLUMN last_accessed_at INTEGER');
|
||||
_db.exec('UPDATE nodes SET last_accessed_at = updated_at WHERE last_accessed_at IS NULL');
|
||||
}
|
||||
|
||||
return _db;
|
||||
}
|
||||
|
||||
|
||||
10
src/core/decay.ts
Normal file
10
src/core/decay.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { getDb } from './db';
|
||||
|
||||
export function decayStaleNodes(maxAgeDays: number = 180): number {
|
||||
const db = getDb();
|
||||
const threshold = Date.now() - maxAgeDays * 24 * 60 * 60 * 1000;
|
||||
const result = db.prepare(
|
||||
'UPDATE nodes SET is_stale = 1 WHERE is_stale = 0 AND (last_accessed_at IS NULL OR last_accessed_at < ?)'
|
||||
).run(threshold);
|
||||
return result.changes;
|
||||
}
|
||||
@@ -46,6 +46,7 @@ export function getConnections(nodeId: string): { incoming: (Edge & { node: Node
|
||||
embedding: null,
|
||||
createdAt: row.n_created,
|
||||
updatedAt: row.n_updated,
|
||||
lastAccessedAt: row.last_accessed_at ?? row.n_updated,
|
||||
isStale: !!row.is_stale,
|
||||
},
|
||||
});
|
||||
|
||||
6
src/core/search/freshness.ts
Normal file
6
src/core/search/freshness.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
const DECAY_RATE = 0.01; // ~69 day half-life
|
||||
|
||||
export function freshnessMultiplier(lastAccessedAt: number, now: number = Date.now()): number {
|
||||
const ageDays = (now - lastAccessedAt) / (1000 * 60 * 60 * 24);
|
||||
return Math.exp(-DECAY_RATE * Math.max(0, ageDays));
|
||||
}
|
||||
@@ -2,9 +2,11 @@ import { Node, SearchResult, QueryOptions } from '../../types';
|
||||
import { bm25Search } from './bm25';
|
||||
import { cosineSimilarity } from './vector';
|
||||
import { getEmbedding, isOllamaAvailable } from './ollama';
|
||||
import { freshnessMultiplier } from './freshness';
|
||||
|
||||
const VECTOR_WEIGHT = 0.7;
|
||||
const BM25_WEIGHT = 0.3;
|
||||
const VECTOR_WEIGHT = 0.6;
|
||||
const BM25_WEIGHT = 0.25;
|
||||
const FRESHNESS_WEIGHT = 0.15;
|
||||
|
||||
function deserializeEmbedding(blob: Buffer | null): number[] | null {
|
||||
if (!blob || blob.length === 0) return null;
|
||||
@@ -66,6 +68,13 @@ export async function hybridSearch(
|
||||
if (node) results.push({ node, score });
|
||||
}
|
||||
|
||||
// Apply freshness multiplier
|
||||
const now = Date.now();
|
||||
for (const r of results) {
|
||||
const freshness = freshnessMultiplier(r.node.lastAccessedAt, now);
|
||||
r.score = r.score * (1 - FRESHNESS_WEIGHT + FRESHNESS_WEIGHT * freshness);
|
||||
}
|
||||
|
||||
results.sort((a, b) => b.score - a.score);
|
||||
return results.slice(0, limit);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ function rowToNode(row: any): Node {
|
||||
embedding: row.embedding ? deserializeEmbedding(row.embedding) : null,
|
||||
createdAt: row.created_at,
|
||||
updatedAt: row.updated_at,
|
||||
lastAccessedAt: row.last_accessed_at ?? row.updated_at,
|
||||
isStale: !!row.is_stale,
|
||||
};
|
||||
}
|
||||
@@ -36,13 +37,13 @@ export async function addNode(input: AddNodeInput): Promise<Node> {
|
||||
const embedding = await getEmbedding(`${input.title} ${content}`);
|
||||
|
||||
db.prepare(`
|
||||
INSERT INTO nodes (id, kind, title, content, status, tags, metadata, embedding, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO nodes (id, kind, title, content, status, tags, metadata, embedding, created_at, updated_at, last_accessed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`).run(
|
||||
id, input.kind, input.title, content, input.status ?? null,
|
||||
JSON.stringify(tags), JSON.stringify(metadata),
|
||||
embedding ? serializeEmbedding(embedding) : null,
|
||||
now, now
|
||||
now, now, now
|
||||
);
|
||||
|
||||
// Insert tags
|
||||
@@ -53,14 +54,16 @@ export async function addNode(input: AddNodeInput): Promise<Node> {
|
||||
|
||||
return {
|
||||
id, kind: input.kind, title: input.title, content, status: input.status,
|
||||
tags, metadata, embedding, createdAt: now, updatedAt: now, isStale: false,
|
||||
tags, metadata, embedding, createdAt: now, updatedAt: now, lastAccessedAt: now, isStale: false,
|
||||
};
|
||||
}
|
||||
|
||||
export function getNode(id: string): Node | null {
|
||||
const db = getDb();
|
||||
const row = db.prepare('SELECT * FROM nodes WHERE id = ?').get(id) as any;
|
||||
return row ? rowToNode(row) : null;
|
||||
if (!row) return null;
|
||||
db.prepare('UPDATE nodes SET last_accessed_at = ? WHERE id = ?').run(Date.now(), id);
|
||||
return rowToNode(row);
|
||||
}
|
||||
|
||||
export function findNodeByPrefix(prefix: string): Node | null {
|
||||
|
||||
Reference in New Issue
Block a user