import { LlamaCloud } from '@llamaindex/llama-cloud';
import { deploy } from '@okrapdf/edge-kit';
import type { PageInput } from '@okrapdf/edge-kit';
// 1. Parse PDF via LlamaParse (or any vendor)
const client = new LlamaCloud({ apiKey: process.env.LLAMAINDEX_API_KEY });
const parseResult = await client.parsing.parse({
source_url: 'https://www.irs.gov/pub/irs-pdf/fw9.pdf',
tier: 'cost_effective',
version: 'latest',
expand: ['items', 'markdown'],
}, { verbose: true });
// 2. Convert vendor output → vendor-agnostic PageInput
const pages: PageInput[] = [];
for (const page of parseResult.markdown?.pages ?? []) {
if (!('markdown' in page)) continue;
pages.push({ pageNum: page.page_number, text: page.markdown });
}
// 3. Configure PII detection
const pii = {
preset: 'hipaa',
patterns: ['SSN', 'EMAIL', 'PHONE_US', 'TAX_ID_US'],
includeNames: true,
includeAddresses: true,
};
// 4. Deploy with redaction config → get 3 URLs
const result = await deploy({
pages,
meta: { title: 'IRS W-9 (Rev. 3-2024)', filename: 'fw9.pdf' },
redact: {
pii,
publicFieldAllowlist: ['Form W-9', 'Part I', 'Part II', 'General Instructions'],
},
apiKey: process.env.OKRA_API_KEY!,
});
console.log(result.urls.admin); // full text
console.log(result.urls.viewer); // PII redacted
console.log(result.urls.public); // allowlist only
console.log(result.stats); // { totalMatches: 5, pagesAffected: 2, byRule: { SSN: 1, EMAIL: 2, PHONE_US: 2 } }