- Add URL fetching with HTML-to-text extraction - Add basic PDF text extraction - Add smart content chunking with overlap - Add deduplication via content checksums - Add auto-linking to semantically related nodes - Add CLI commands: ingest, clip - Add MCP tools: memory_ingest, memory_clip
96 lines
3.1 KiB
TypeScript
96 lines
3.1 KiB
TypeScript
import { Command } from 'commander';
|
|
import chalk from 'chalk';
|
|
import { ingest } from '../../core/ingest';
|
|
|
|
export const ingestCommand = new Command('ingest')
|
|
.description('Ingest content from URLs, files, or stdin into the knowledge graph')
|
|
.argument('[source]', 'URL or file path to ingest')
|
|
.option('-t, --title <title>', 'Override title')
|
|
.option('--tags <tags>', 'Tags to apply (comma-separated)')
|
|
.option('--stdin', 'Read content from stdin')
|
|
.option('--chunk-size <n>', 'Max tokens per chunk (default: 1000)')
|
|
.option('--no-link', 'Skip auto-linking to related nodes')
|
|
.action(async (source: string | undefined, opts) => {
|
|
try {
|
|
if (!source && !opts.stdin) {
|
|
console.error(chalk.red('Error: Provide a source URL/file or use --stdin'));
|
|
process.exit(1);
|
|
}
|
|
|
|
if (opts.stdin) {
|
|
console.log(chalk.cyan('Reading from stdin... (Ctrl+D to end)'));
|
|
} else {
|
|
console.log(chalk.cyan(`Ingesting: ${source}`));
|
|
}
|
|
|
|
const result = await ingest(source || '', {
|
|
title: opts.title,
|
|
tags: opts.tags?.split(',').map((t: string) => t.trim()),
|
|
stdin: opts.stdin,
|
|
noLink: !opts.link,
|
|
chunkStrategy: opts.chunkSize ? {
|
|
maxTokens: parseInt(opts.chunkSize),
|
|
} : undefined,
|
|
});
|
|
|
|
if (!result.success) {
|
|
console.log(chalk.yellow('Content already exists (duplicate checksum)'));
|
|
return;
|
|
}
|
|
|
|
console.log();
|
|
console.log(chalk.green(`✓ Ingested: ${result.title}`));
|
|
console.log();
|
|
console.log(` Type: ${result.sourceType}`);
|
|
console.log(` Nodes: ${result.nodeCount}`);
|
|
|
|
if (result.parentId) {
|
|
console.log(` Parent: ${result.parentId.slice(0, 8)}`);
|
|
}
|
|
|
|
for (const node of result.nodes.slice(0, 5)) {
|
|
console.log(chalk.dim(` - ${node.id.slice(0, 8)} ${node.title}`));
|
|
}
|
|
|
|
if (result.nodes.length > 5) {
|
|
console.log(chalk.dim(` ... and ${result.nodes.length - 5} more`));
|
|
}
|
|
} catch (err: any) {
|
|
console.error(chalk.red(`Error: ${err.message}`));
|
|
process.exit(1);
|
|
}
|
|
});
|
|
|
|
// Alias for quick URL clipping
|
|
export const clipCommand = new Command('clip')
|
|
.description('Quick clip a URL (alias for ingest)')
|
|
.argument('<url>', 'URL to clip')
|
|
.option('-t, --title <title>', 'Override title')
|
|
.option('--tags <tags>', 'Tags to apply (comma-separated)')
|
|
.action(async (url: string, opts) => {
|
|
try {
|
|
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
|
console.error(chalk.red('Error: clip expects a URL'));
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(chalk.cyan(`Clipping: ${url}`));
|
|
|
|
const result = await ingest(url, {
|
|
title: opts.title,
|
|
tags: opts.tags?.split(',').map((t: string) => t.trim()),
|
|
});
|
|
|
|
if (!result.success) {
|
|
console.log(chalk.yellow('Already clipped (duplicate)'));
|
|
return;
|
|
}
|
|
|
|
console.log(chalk.green(`✓ ${result.title}`));
|
|
console.log(chalk.dim(` ${result.nodes[0].id.slice(0, 8)}`));
|
|
} catch (err: any) {
|
|
console.error(chalk.red(`Error: ${err.message}`));
|
|
process.exit(1);
|
|
}
|
|
});
|