Documents Module
The Documents module provides file upload, classification, RAG (Retrieval-Augmented Generation) indexing, and document management capabilities. Documents can be used as context sources for AI chat and search.
Basic Usage
import { PlatformClient } from '@enterpriseaigroup/platform-sdk';
import fs from 'fs';
const client = new PlatformClient({ /* config */ });
// Upload a document
const file = fs.readFileSync('./contract.pdf');
const document = await client.documents.upload(file, {
filename: 'contract.pdf',
type: 'contract',
metadata: { client: 'Acme Corp' }
});
// Classify document
const classification = await client.documents.classify(document.id);
console.log(`Document type: ${classification.category}`);
// Index for RAG
await client.documents.index(document.id);
// List documents
const documents = await client.documents.list({ type: 'contract' });
Methods
upload()
Upload a file and create a document record.
upload(
file: Buffer | Blob | File,
options: UploadOptions
): Promise<Document>
Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
file | Buffer | Blob | File | Yes | File content |
filename | string | Yes | Original filename with extension |
type | string | No | Document type/category |
metadata | Record<string, any> | No | Custom metadata |
tags | string[] | No | Tags for categorization |
autoClassify | boolean | No | Auto-classify on upload (default: false) |
autoIndex | boolean | No | Auto-index for RAG (default: false) |
Returns
interface Document {
id: string;
filename: string;
type?: string;
contentType: string;
size: number;
url: string;
status: 'uploaded' | 'processing' | 'ready' | 'failed';
metadata?: Record<string, any>;
tags?: string[];
classification?: Classification;
indexed: boolean;
createdAt: string;
updatedAt: string;
}
Example
// Upload from file system (Node.js)
import fs from 'fs';
const fileBuffer = fs.readFileSync('./report.pdf');
const document = await client.documents.upload(fileBuffer, {
filename: 'Q1-2026-Report.pdf',
type: 'report',
metadata: {
quarter: 'Q1',
year: 2026,
department: 'Sales'
},
tags: ['quarterly', 'sales'],
autoClassify: true,
autoIndex: true
});
console.log(`Document uploaded: ${document.id}`);
console.log(`URL: ${document.url}`);
// Upload from browser
async function uploadFile(fileInput: HTMLInputElement) {
const file = fileInput.files?.[0];
if (!file) return;
const document = await client.documents.upload(file, {
filename: file.name,
type: 'contract',
autoClassify: true
});
console.log('Upload complete:', document.id);
}
list()
List documents with filtering and pagination.
list(options?: DocumentListOptions): Promise<DocumentList>
Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
type | string | No | Filter by document type |
tags | string[] | No | Filter by tags (AND logic) |
status | DocumentStatus | No | Filter by status |
indexed | boolean | No | Filter by indexing status |
limit | number | No | Maximum results (default: 50) |
offset | number | No | Pagination offset |
Returns
interface DocumentList {
data: Document[];
total: number;
hasMore: boolean;
}
Example
const documents = await client.documents.list({
type: 'contract',
status: 'ready',
indexed: true,
limit: 20
});
console.log(`Found ${documents.total} indexed contracts`);
documents.data.forEach(doc => {
console.log(`${doc.filename} (${doc.size} bytes)`);
});
get()
Get a document by ID.
get(documentId: string): Promise<Document>
Example
const document = await client.documents.get('doc_abc123');
console.log(document.filename);
console.log(`Status: ${document.status}`);
download()
Download document content.
download(documentId: string): Promise<Buffer>
Example
// Download and save to file
const content = await client.documents.download('doc_abc123');
fs.writeFileSync('./downloaded-file.pdf', content);
// Download and convert to base64
const base64 = content.toString('base64');
classify()
Classify a document's content and extract metadata.
classify(documentId: string): Promise<Classification>
Returns
interface Classification {
category: string;
subcategory?: string;
confidence: number;
extractedData?: Record<string, any>;
language?: string;
summary?: string;
}
Example
const classification = await client.documents.classify('doc_abc123');
console.log(`Category: ${classification.category}`);
console.log(`Confidence: ${(classification.confidence * 100).toFixed(0)}%`);
if (classification.extractedData) {
console.log('Extracted fields:');
Object.entries(classification.extractedData).forEach(([key, value]) => {
console.log(` ${key}: ${value}`);
});
}
index()
Index a document for RAG (Retrieval-Augmented Generation).
index(documentId: string, options?: IndexOptions): Promise<IndexResult>
Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
chunkSize | number | No | Size of text chunks (default: 1000) |
chunkOverlap | number | No | Overlap between chunks (default: 200) |
metadata | Record<string, any> | No | Additional metadata to index |
Returns
interface IndexResult {
documentId: string;
chunks: number;
status: 'indexed' | 'failed';
error?: string;
}
Example
const result = await client.documents.index('doc_abc123', {
chunkSize: 1500,
chunkOverlap: 300,
metadata: {
department: 'Legal',
confidential: true
}
});
console.log(`Indexed ${result.chunks} chunks`);
search()
Search indexed documents semantically.
search(query: string, options?: SearchOptions): Promise<SearchResult[]>
Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
query | string | Yes | Search query |
limit | number | No | Maximum results (default: 10) |
type | string | No | Filter by document type |
tags | string[] | No | Filter by tags |
minScore | number | No | Minimum similarity score (0-1) |
Returns
interface SearchResult {
documentId: string;
filename: string;
chunk: string;
score: number;
metadata?: Record<string, any>;
highlights?: string[];
}
Example
const results = await client.documents.search(
'termination clause',
{
type: 'contract',
minScore: 0.7,
limit: 5
}
);
results.forEach(result => {
console.log(`\n${result.filename} (score: ${result.score.toFixed(2)})`);
console.log(`"${result.chunk}"`);
});
update()
Update document metadata or properties.
update(
documentId: string,
updates: Partial<Document>
): Promise<Document>
Example
const updated = await client.documents.update('doc_abc123', {
type: 'legal-contract',
tags: ['reviewed', 'approved'],
metadata: {
...existingMetadata,
reviewedBy: 'user_789',
reviewedAt: new Date().toISOString()
}
});
delete()
Delete a document and remove from index.
delete(documentId: string): Promise<void>
Example
await client.documents.delete('doc_abc123');
console.log('Document deleted');
Supported File Types
The Documents module supports the following file types:
| Category | Extensions | MIME Types |
|---|---|---|
| Documents | .pdf, .doc, .docx, .txt, .rtf | application/pdf, application/msword, etc. |
| Spreadsheets | .xls, .xlsx, .csv | application/vnd.ms-excel, text/csv |
| Presentations | .ppt, .pptx | application/vnd.ms-powerpoint |
| Images | .jpg, .png, .gif, .webp | image/* |
| Archives | .zip, .tar, .gz | application/zip, etc. |
Size Limits
- Maximum file size: 100 MB per file
- Maximum batch size: 500 MB total
- Maximum concurrent uploads: 10 per tenant
Document Processing Pipeline
// 1. Upload
const document = await client.documents.upload(file, {
filename: 'contract.pdf',
autoClassify: true,
autoIndex: true
});
// 2. Wait for processing (if needed)
let doc = await client.documents.get(document.id);
while (doc.status === 'processing') {
await new Promise(resolve => setTimeout(resolve, 1000));
doc = await client.documents.get(document.id);
}
// 3. Verify classification
if (doc.classification) {
console.log(`Classified as: ${doc.classification.category}`);
}
// 4. Use in chat
const response = await client.chat.sendMessage(
'What are the payment terms?',
{
context: {
documents: [doc.id]
}
}
);
Batch Operations
Batch Upload
async function uploadBatch(files: File[]) {
const uploads = files.map(file =>
client.documents.upload(file, {
filename: file.name,
autoClassify: true
})
);
const documents = await Promise.all(uploads);
console.log(`Uploaded ${documents.length} documents`);
return documents;
}
Batch Indexing
async function indexDocuments(documentIds: string[]) {
const results = await Promise.all(
documentIds.map(id => client.documents.index(id))
);
const totalChunks = results.reduce((sum, r) => sum + r.chunks, 0);
console.log(`Indexed ${totalChunks} total chunks`);
}
Error Handling
try {
const document = await client.documents.upload(file, {
filename: 'large-file.pdf'
});
} catch (error) {
if (error.code === 'FILE_TOO_LARGE') {
console.error('File exceeds 100 MB limit');
} else if (error.code === 'UNSUPPORTED_FILE_TYPE') {
console.error('File type not supported');
} else if (error.code === 'VIRUS_DETECTED') {
console.error('File failed security scan');
} else {
console.error('Upload failed:', error.message);
}
}
Complete Example
async function processContractDocuments(files: File[]) {
console.log(`Processing ${files.length} contract files...`);
// Upload all files
const documents = await Promise.all(
files.map(file =>
client.documents.upload(file, {
filename: file.name,
type: 'contract',
tags: ['legal', 'pending-review']
})
)
);
console.log('Uploads complete. Classifying...');
// Classify each document
const classifications = await Promise.all(
documents.map(doc => client.documents.classify(doc.id))
);
// Index documents for RAG
console.log('Indexing for search...');
await Promise.all(
documents.map(doc => client.documents.index(doc.id))
);
// Analyze with AI
console.log('Analyzing contracts...');
const analysis = await client.chat.sendMessage(
'Summarize the key terms and identify any unusual clauses in these contracts.',
{
context: {
documents: documents.map(d => d.id)
},
temperature: 0.3
}
);
return {
documents,
classifications,
analysis: analysis.content,
citations: analysis.citations
};
}
// Usage
const result = await processContractDocuments(contractFiles);
console.log('Analysis:', result.analysis);
Next Steps
- Learn about Chat module for RAG queries
- Explore useDocuments hook for React integration
- See Documents API endpoints
- Read Document management guide