diff --git a/.codex/.gitignore b/.codex/.gitignore new file mode 100644 index 00000000000..768f3da2a60 --- /dev/null +++ b/.codex/.gitignore @@ -0,0 +1 @@ +environments/ diff --git a/apps/hash-frontend/.gitignore b/apps/hash-frontend/.gitignore new file mode 100644 index 00000000000..7504bbb1f81 --- /dev/null +++ b/apps/hash-frontend/.gitignore @@ -0,0 +1,2 @@ +# AI-generated working documents (roadmaps, plans, research notes) +memory/ diff --git a/apps/hash-frontend/next.config.js b/apps/hash-frontend/next.config.js index 19ce65f5d5c..69c9a6a9a9c 100644 --- a/apps/hash-frontend/next.config.js +++ b/apps/hash-frontend/next.config.js @@ -64,6 +64,10 @@ const apiUrl = process.env.NEXT_PUBLIC_API_ORIGIN ?? "http://localhost:5001"; const apiDomain = new URL(apiUrl).hostname; +// Mastra API origin for ingest pipeline proxy (local dev: port 4111) +const mastraApiOrigin = + process.env.MASTRA_API_ORIGIN ?? "http://localhost:4111"; + /** * @todo: import the page `entityTypeId` from `@local/hash-isomorphic-utils/ontology-types` * when the `next.config.js` supports imports from modules @@ -81,6 +85,19 @@ export default withSentryConfig( { async rewrites() { return [ + // Ingest pipeline proxy → Mastra API + { + source: "/api/ingest", + destination: `${mastraApiOrigin}/discovery-runs`, + }, + { + source: "/api/ingest/:path*", + destination: `${mastraApiOrigin}/discovery-runs/:path*`, + }, + { + source: "/api/ingest-fixtures/:path*", + destination: `${mastraApiOrigin}/discovery-fixtures/:path*`, + }, { source: "/pages", destination: `/entities?entityTypeIdOrBaseUrl=${pageEntityTypeBaseUrl}`, diff --git a/apps/hash-frontend/package.json b/apps/hash-frontend/package.json index 9c6157f919b..7a997bb4f79 100644 --- a/apps/hash-frontend/package.json +++ b/apps/hash-frontend/package.json @@ -11,6 +11,7 @@ "codegen": "rimraf './src/**/*.gen.*'; graphql-codegen --config codegen.config.ts", "dev": "next dev", "fix:eslint": "eslint --fix .", + "fix:format": "biome format --write", "lint:eslint": "eslint --report-unused-disable-directives .", "lint:tsc": "tsc --noEmit", "start": "next start", @@ -20,6 +21,7 @@ }, "dependencies": { "@apollo/client": "3.10.5", + "@ark-ui/react": "5.26.2", "@blockprotocol/core": "0.1.4", "@blockprotocol/graph": "workspace:*", "@blockprotocol/hook": "0.1.8", diff --git a/apps/hash-frontend/src/pages/ingest.page.tsx b/apps/hash-frontend/src/pages/ingest.page.tsx new file mode 100644 index 00000000000..26a3f10782a --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page.tsx @@ -0,0 +1,65 @@ +import { InfinityLightIcon } from "@hashintel/design-system"; +import { Box, Container } from "@mui/material"; +import { useRouter } from "next/router"; +import { useEffect } from "react"; + +import type { NextPageWithLayout } from "../shared/layout"; +import { getLayoutWithSidebar } from "../shared/layout"; +import { WorkersHeader } from "../shared/workers-header"; +import { getIngestResultsPath } from "./ingest.page/routing"; +import { UploadPanel } from "./ingest.page/upload-panel"; +import { shouldFetchResults, useIngestRun } from "./ingest.page/use-ingest-run"; + +const IngestPage: NextPageWithLayout = () => { + const router = useRouter(); + const { state, upload, reset } = useIngestRun(); + + useEffect(() => { + if (!shouldFetchResults(state)) { + return; + } + void router.push( + getIngestResultsPath({ + kind: "run", + runId: state.runStatus.runId, + }), + ); + }, [router, state]); + + return ( + <> + + + + + + + + ); +}; + +IngestPage.getLayout = (page) => + getLayoutWithSidebar(page, { fullWidth: true }); + +export default IngestPage; diff --git a/apps/hash-frontend/src/pages/ingest.page/bbox-transform.ts b/apps/hash-frontend/src/pages/ingest.page/bbox-transform.ts new file mode 100644 index 00000000000..e6cbf9094d0 --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/bbox-transform.ts @@ -0,0 +1,38 @@ +/** + * Coordinate transform: PDF-point bbox → CSS percentage positioning. + * + * Overlays are absolutely-positioned
s inside a container wrapping the + * page . Percentage-based positioning keeps them responsive. + */ + +export interface BboxInput { + x1: number; + y1: number; + x2: number; + y2: number; +} + +export interface BboxPercentage { + left: number; + top: number; + width: number; + height: number; +} + +export function bboxToPercentage( + bbox: BboxInput, + pdfPageWidth: number, + pdfPageHeight: number, + origin: "BOTTOMLEFT" | "TOPLEFT", +): BboxPercentage { + const left = (bbox.x1 / pdfPageWidth) * 100; + const width = ((bbox.x2 - bbox.x1) / pdfPageWidth) * 100; + const height = ((bbox.y2 - bbox.y1) / pdfPageHeight) * 100; + + const top = + origin === "BOTTOMLEFT" + ? ((pdfPageHeight - bbox.y2) / pdfPageHeight) * 100 + : (bbox.y1 / pdfPageHeight) * 100; + + return { left, top, width, height }; +} diff --git a/apps/hash-frontend/src/pages/ingest.page/evidence-resolver.ts b/apps/hash-frontend/src/pages/ingest.page/evidence-resolver.ts new file mode 100644 index 00000000000..3d0dfd71e4a --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/evidence-resolver.ts @@ -0,0 +1,51 @@ +/** + * Evidence resolver: selection → highlighted block IDs + target page. + * + * Pure function. No I/O, no React. + */ +import type { Block, ExtractedClaim, RosterEntry } from "./types"; + +export type Selection = + | { kind: "roster"; entry: RosterEntry } + | { kind: "claim"; claim: ExtractedClaim } + | null; + +export interface EvidenceResult { + blockIds: string[]; + targetPage: number | null; +} + +export function resolveEvidence( + selection: Selection, + blocks: Block[], +): EvidenceResult { + if (!selection) { + return { blockIds: [], targetPage: null }; + } + + const blockIds = + selection.kind === "roster" + ? [...new Set(selection.entry.mentions.map((mention) => mention.blockId))] + : [ + ...new Set( + selection.claim.evidenceRefs.flatMap((ref) => ref.blockIds), + ), + ]; + + let targetPage: number | null = null; + for (const block of blocks) { + if (!blockIds.includes(block.blockId)) { + continue; + } + for (const anchor of block.anchors) { + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- Anchor union may expand + if (anchor.kind === "file_page_bbox") { + if (targetPage === null || anchor.page < targetPage) { + targetPage = anchor.page; + } + } + } + } + + return { blockIds, targetPage }; +} diff --git a/apps/hash-frontend/src/pages/ingest.page/page-viewer.tsx b/apps/hash-frontend/src/pages/ingest.page/page-viewer.tsx new file mode 100644 index 00000000000..082870547eb --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/page-viewer.tsx @@ -0,0 +1,129 @@ +/** + * Page viewer: PDF page image with bbox overlay highlights. + */ +import { Box, Stack, Typography } from "@mui/material"; +import type { FunctionComponent } from "react"; + +import { Button } from "../../shared/ui/button"; +import { bboxToPercentage } from "./bbox-transform"; +import type { Block, PageImageManifest } from "./types"; + +interface PageViewerProps { + pageImages: PageImageManifest[]; + blocks: Block[]; + highlightedBlockIds: string[]; + currentPage: number; + onPageChange: (page: number) => void; +} + +export const PageViewer: FunctionComponent = ({ + pageImages, + blocks, + highlightedBlockIds, + currentPage, + onPageChange, +}) => { + const totalPages = pageImages.length; + const pageImage = pageImages.find((img) => img.pageNumber === currentPage); + if (!pageImage) { + return null; + } + + const visibleBlocks = + highlightedBlockIds.length > 0 + ? blocks.filter( + (block) => + highlightedBlockIds.includes(block.blockId) && + block.anchors.some((anchor) => anchor.page === currentPage), + ) + : []; + + return ( + + {/* Page navigation */} + + + + Page {currentPage} / {totalPages} + + + {visibleBlocks.length > 0 && ( + + {visibleBlocks.length} highlighted + + )} + + + {/* Page image with bbox overlays */} + + {`Page + + {visibleBlocks.map((block) => { + const anchor = block.anchors.find((anc) => anc.page === currentPage); + if (!anchor) { + return null; + } + + const pct = bboxToPercentage( + anchor.bbox, + pageImage.pdfPageWidth, + pageImage.pdfPageHeight, + pageImage.bboxOrigin, + ); + + return ( + + ); + })} + + + ); +}; diff --git a/apps/hash-frontend/src/pages/ingest.page/results-panel.tsx b/apps/hash-frontend/src/pages/ingest.page/results-panel.tsx new file mode 100644 index 00000000000..44635d0e414 --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/results-panel.tsx @@ -0,0 +1,210 @@ +/** + * Results panel: roster entries + claims list with click-to-highlight. + * + * Left-side panel in the ingest results view. + */ +import { + Box, + ButtonBase, + List, + ListSubheader, + Typography, +} from "@mui/material"; +import type { FunctionComponent } from "react"; + +import type { Selection } from "./evidence-resolver"; +import type { ExtractedClaim, RosterEntry } from "./types"; + +interface ResultsPanelProps { + rosterEntries: RosterEntry[]; + claims: ExtractedClaim[]; + selection: Selection; + onSelect: (selection: Selection) => void; +} + +const CATEGORY_ICONS: Record = { + person: "👤", + organization: "🏢", + place: "📍", + artifact: "📄", + event: "📅", + other: "◽", +}; + +export const ResultsPanel: FunctionComponent = ({ + rosterEntries, + claims, + selection, + onSelect, +}) => { + const isRosterSelected = (entry: RosterEntry) => + selection?.kind === "roster" && + selection.entry.rosterEntryId === entry.rosterEntryId; + + const isClaimSelected = (claim: ExtractedClaim) => + selection?.kind === "claim" && selection.claim.claimId === claim.claimId; + + return ( + `1px solid ${palette.gray[30]}`, + overflowY: "auto", + display: "flex", + flexDirection: "column", + }} + > + {/* Roster section */} + `1px solid ${palette.gray[30]}`, + fontSize: "0.75rem", + fontWeight: 600, + textTransform: "uppercase", + letterSpacing: "0.05em", + lineHeight: 1, + }} + > + Roster ({rosterEntries.length}) + + + {rosterEntries.map((entry) => { + const selected = isRosterSelected(entry); + return ( + + onSelect(selected ? null : { kind: "roster", entry }) + } + sx={{ + display: "block", + width: "100%", + px: 2, + py: 1, + textAlign: "left", + borderBottom: ({ palette }) => `1px solid ${palette.gray[20]}`, + bgcolor: selected ? "rgba(59, 130, 246, 0.08)" : "transparent", + "&:hover": { bgcolor: "rgba(59, 130, 246, 0.04)" }, + }} + > + + + {CATEGORY_ICONS[entry.category ?? "other"] ?? "◽"} + + + {entry.canonicalName} + + + + {entry.mentions.length} mention + {entry.mentions.length !== 1 ? "s" : ""} + + + ); + })} + + + {/* Claims section */} + `1px solid ${palette.gray[30]}`, + borderBottom: ({ palette }) => `1px solid ${palette.gray[30]}`, + mt: 1, + fontSize: "0.75rem", + fontWeight: 600, + textTransform: "uppercase", + letterSpacing: "0.05em", + lineHeight: 1, + }} + > + Claims ({claims.length}) + + + {rosterEntries.map((entry) => { + const entryClaims = claims.filter( + (claim) => claim.rosterEntryId === entry.rosterEntryId, + ); + if (entryClaims.length === 0) { + return null; + } + return ( + + + `1px solid ${palette.gray[20]}`, + bgcolor: "rgba(0, 0, 0, 0.02)", + }} + > + {CATEGORY_ICONS[entry.category ?? "other"] ?? "◽"}{" "} + {entry.canonicalName} ({entryClaims.length}) + + {entryClaims.map((claim) => { + const selected = isClaimSelected(claim); + const firstEvidenceRef = claim.evidenceRefs.at(0); + const quote = firstEvidenceRef + ? firstEvidenceRef.quote.substring(0, 60) + : undefined; + return ( + + onSelect(selected ? null : { kind: "claim", claim }) + } + sx={{ + display: "block", + width: "100%", + px: 2, + py: 1, + pl: 3, + textAlign: "left", + borderBottom: ({ palette }) => + `1px solid ${palette.gray[20]}`, + bgcolor: selected + ? "rgba(59, 130, 246, 0.08)" + : "transparent", + "&:hover": { bgcolor: "rgba(59, 130, 246, 0.04)" }, + }} + > + + {claim.claimText} + + {quote && ( + + "{quote}…" + + )} + + ); + })} + + ); + })} + + + ); +}; diff --git a/apps/hash-frontend/src/pages/ingest.page/routing.ts b/apps/hash-frontend/src/pages/ingest.page/routing.ts new file mode 100644 index 00000000000..9d77816d06c --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/routing.ts @@ -0,0 +1,45 @@ +export const INGEST_FIXTURES = [ + { id: "uk-practice-direction-51zh", label: "UK Practice Direction" }, + { id: "gao-25-107546", label: "GAO Report" }, +] as const; + +export type IngestResultsSource = + | { kind: "fixture"; fixtureId: string } + | { kind: "run"; runId: string }; + +const DEFAULT_FIXTURE_ID = INGEST_FIXTURES[0].id; +const INGEST_FIXTURE_IDS = new Set( + INGEST_FIXTURES.map((fixture) => fixture.id), +); + +/** + * Derive the results source from Next.js query params. + */ +export function getIngestResultsSource(query: { + runId?: string; + fixture?: string; +}): IngestResultsSource { + const runId = query.runId?.trim(); + if (runId) { + return { kind: "run", runId }; + } + + const fixtureId = query.fixture?.trim(); + if (fixtureId && INGEST_FIXTURE_IDS.has(fixtureId)) { + return { kind: "fixture", fixtureId }; + } + + return { kind: "fixture", fixtureId: DEFAULT_FIXTURE_ID }; +} + +export function getIngestResultsPath(source: IngestResultsSource): string { + const params = new URLSearchParams(); + + if (source.kind === "fixture") { + params.set("fixture", source.fixtureId); + } else { + params.set("runId", source.runId); + } + + return `/ingest/results?${params.toString()}`; +} diff --git a/apps/hash-frontend/src/pages/ingest.page/types.ts b/apps/hash-frontend/src/pages/ingest.page/types.ts new file mode 100644 index 00000000000..c702c4a39da --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/types.ts @@ -0,0 +1,169 @@ +/** + * Contract types for the ingest pipeline UI. + * + * These mirror the Zod schemas in the internal repo's pipeline contracts + * but as plain TypeScript types — the Mastra API validates; the frontend + * just consumes the JSON. + */ + +// --------------------------------------------------------------------------- +// Anchors & blocks +// --------------------------------------------------------------------------- + +export interface PdfBbox { + x1: number; + y1: number; + x2: number; + y2: number; + unit: "pt"; +} + +export interface FilePageBboxAnchor { + kind: "file_page_bbox"; + page: number; + bbox: PdfBbox; +} + +export type Anchor = FilePageBboxAnchor; + +export interface Block { + blockId: string; + sourceId: string; + kind: string; + text: string; + anchors: Anchor[]; + confidence?: number; + attributes?: Record; +} + +// --------------------------------------------------------------------------- +// Evidence refs +// --------------------------------------------------------------------------- + +export interface BlockSpan { + blockId: string; + start: number; + end: number; +} + +export interface EvidenceRef { + sourceId: string; + blockIds: string[]; + blockSpans: BlockSpan[]; + quote: string; +} + +// --------------------------------------------------------------------------- +// Corpus +// --------------------------------------------------------------------------- + +export interface Source { + sourceId: string; + kind: "file" | "web" | "audio" | "video"; + mimeType: string; + stableRef: { + contentHash: string; + fileEntityId?: string; + snapshotId?: string; + }; +} + +export interface ExtractedCorpus { + version: "v0"; + parser: string; + sources: Source[]; + blocks: Block[]; + metadata: { + language?: string; + createdAt?: string; + }; +} + +// --------------------------------------------------------------------------- +// Discovery domain +// --------------------------------------------------------------------------- + +export type MentionCategory = + | "person" + | "organization" + | "place" + | "artifact" + | "event" + | "other"; + +export interface EntityMention { + chunkId: string; + blockId: string; + start: number; + end: number; + surface: string; +} + +export interface RosterEntry { + rosterEntryId: string; + canonicalName: string; + category?: MentionCategory; + discoveredTypeId: string; + resolvedTypeId: string; + summary: string; + mentions: EntityMention[]; + chunkIds: string[]; + mergedLocalIds: string[]; +} + +export interface ExtractedClaim { + claimId: string; + rosterEntryId: string; + claimText: string; + subject: string; + predicate: string; + object: string; + evidenceRefs: EvidenceRef[]; +} + +// --------------------------------------------------------------------------- +// Page images +// --------------------------------------------------------------------------- + +export interface PageImageManifest { + contentHash: string; + pageNumber: number; + imageUrl: string; + pdfPageWidth: number; + pdfPageHeight: number; + bboxOrigin: "BOTTOMLEFT" | "TOPLEFT"; +} + +// --------------------------------------------------------------------------- +// Run status & view +// --------------------------------------------------------------------------- + +export interface RunStatus { + runId: string; + status: "queued" | "running" | "succeeded" | "failed"; + phase?: string; + step?: string; + contentHash?: string; + counts?: { + pages?: number; + chunks?: number; + mentions?: number; + claims?: number; + }; + startedAt?: string; + updatedAt?: string; + error?: string; +} + +export interface IngestRunView { + runId: string; + sourceMetadata: { + filename: string; + contentHash: string; + mimeType: string; + }; + pageImages: PageImageManifest[]; + roster: { entries: RosterEntry[] }; + claims: ExtractedClaim[]; + corpus: ExtractedCorpus; +} diff --git a/apps/hash-frontend/src/pages/ingest.page/upload-panel.tsx b/apps/hash-frontend/src/pages/ingest.page/upload-panel.tsx new file mode 100644 index 00000000000..aaa0639f9c4 --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/upload-panel.tsx @@ -0,0 +1,211 @@ +/** + * Upload panel: drag-and-drop PDF upload with progress display. + * + * Uses Ark UI FileUpload for accessible drag-and-drop, and MUI for layout. + */ +import { FileUpload } from "@ark-ui/react"; +import { Box, CircularProgress, Typography } from "@mui/material"; +import type { FunctionComponent } from "react"; + +import { Button } from "../../shared/ui/button"; +import type { RunStatus } from "./types"; +import type { IngestRunState } from "./use-ingest-run"; + +// --------------------------------------------------------------------------- +// Sub-components (defined first to satisfy no-use-before-define) +// --------------------------------------------------------------------------- + +const StatusCard: FunctionComponent<{ children: React.ReactNode }> = ({ + children, +}) => ( + `1px solid ${palette.gray[30]}`, + borderRadius: 2, + p: 4, + textAlign: "center", + maxWidth: 400, + mx: "auto", + }} + > + {children} + +); + +const RunCounts: FunctionComponent<{ status: RunStatus }> = ({ status }) => { + if (!status.counts) { + return null; + } + const { pages, chunks, mentions, claims } = status.counts; + const items = [ + pages && `${pages} pages`, + chunks && `${chunks} chunks`, + mentions && `${mentions} mentions`, + claims && `${claims} claims`, + ].filter(Boolean); + if (items.length === 0) { + return null; + } + return ( + + {items.join(" · ")} + + ); +}; + +const RunProgress: FunctionComponent<{ status: RunStatus }> = ({ status }) => ( + + + {status.phase && ( + {status.phase} + )} + {status.step && ( + + → {status.step} + + )} + + + + Run: {status.runId.slice(0, 8)}… + + +); + +const DropZone: FunctionComponent<{ onUpload: (file: File) => void }> = ({ + onUpload, +}) => ( + { + const file = details.files[0]; + if (file) { + onUpload(file); + } + }} + > + + + Drop a PDF here + + + or click to browse + + + + + + + +); + +// --------------------------------------------------------------------------- +// Main component +// --------------------------------------------------------------------------- + +interface UploadPanelProps { + state: IngestRunState; + onUpload: (file: File) => void; + onReset: () => void; +} + +export const UploadPanel: FunctionComponent = ({ + state, + onUpload, + onReset, +}) => { + if (state.phase === "idle") { + return ; + } + + if (state.phase === "uploading") { + return ( + + + Uploading PDF… + + ); + } + + if (state.phase === "streaming") { + return ( + + + + + ); + } + + if (state.phase === "done") { + const succeeded = state.runStatus.status === "succeeded"; + return ( + + {succeeded ? ( + <> + + Pipeline complete! + + + + Opening results… + + + ) : ( + <> + + Pipeline failed + + {state.runStatus.error && ( + + {state.runStatus.error} + + )} + + + )} + + ); + } + + // error phase + return ( + + + Error + + + {state.message} + + + + ); +}; diff --git a/apps/hash-frontend/src/pages/ingest.page/use-ingest-run.ts b/apps/hash-frontend/src/pages/ingest.page/use-ingest-run.ts new file mode 100644 index 00000000000..514bbcdbc4f --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest.page/use-ingest-run.ts @@ -0,0 +1,187 @@ +/** + * Ingest run hook: upload PDF → stream SSE progress → terminal state. + * + * Pure functions (isPdfFile, isTerminalStatus) are exported for testing. + * The hook (useIngestRun) wires them to React state + SSE side effects. + */ +import { useCallback, useEffect, useRef, useState } from "react"; + +import type { RunStatus } from "./types"; + +// --------------------------------------------------------------------------- +// Pure functions (functional core) +// --------------------------------------------------------------------------- + +export function isPdfFile(file: File): boolean { + return ( + file.type === "application/pdf" || file.name.toLowerCase().endsWith(".pdf") + ); +} + +export function isTerminalStatus( + status: RunStatus["status"], +): status is "succeeded" | "failed" { + return status === "succeeded" || status === "failed"; +} + +export function shouldFetchResults( + state: IngestRunState, +): state is Extract { + return state.phase === "done" && state.runStatus.status === "succeeded"; +} + +/** Map an SSE event payload to a RunStatus shape for the UI. */ +export function statusFromEvent( + runId: string, + eventKind: string, + payload: Record, +): RunStatus { + const status: RunStatus["status"] = + eventKind === "run-succeeded" + ? "succeeded" + : eventKind === "run-failed" + ? "failed" + : ((payload.status as RunStatus["status"] | undefined) ?? "running"); + return { + runId, + status, + phase: payload.phase as string | undefined, + step: payload.step as string | undefined, + counts: payload.counts as RunStatus["counts"], + error: payload.error ? (payload.error as string) : undefined, + updatedAt: new Date().toISOString(), + }; +} + +// --------------------------------------------------------------------------- +// State machine +// --------------------------------------------------------------------------- + +export type IngestRunState = + | { phase: "idle" } + | { phase: "uploading" } + | { phase: "streaming"; runStatus: RunStatus } + | { phase: "done"; runStatus: RunStatus } + | { phase: "error"; message: string }; + +// --------------------------------------------------------------------------- +// Hook +// --------------------------------------------------------------------------- + +export function useIngestRun() { + const [state, setState] = useState({ phase: "idle" }); + const esRef = useRef(null); + + useEffect(() => { + return () => { + esRef.current?.close(); + }; + }, []); + + const stopStream = useCallback(() => { + if (esRef.current) { + esRef.current.close(); + esRef.current = null; + } + }, []); + + const startStream = useCallback( + (runId: string) => { + stopStream(); + + const es = new EventSource(`/api/ingest/${runId}/events`); + esRef.current = es; + + const handleEvent = (event: MessageEvent) => { + try { + const payload = JSON.parse(event.data) as Record; + const runStatus = statusFromEvent(runId, event.type, payload); + + if (isTerminalStatus(runStatus.status)) { + stopStream(); + setState({ phase: "done", runStatus }); + } else { + setState({ phase: "streaming", runStatus }); + } + } catch { + // Malformed event — ignore + } + }; + + for (const kind of [ + "run-queued", + "phase-start", + "phase-complete", + "step-start", + "step-complete", + "run-succeeded", + "run-failed", + ]) { + es.addEventListener(kind, handleEvent); + } + + es.onerror = () => { + if (!esRef.current) { + return; + } + stopStream(); + setState({ + phase: "error", + message: "Lost connection to progress stream", + }); + }; + }, + [stopStream], + ); + + const upload = useCallback( + async (file: File) => { + if (!isPdfFile(file)) { + setState({ phase: "error", message: "Only PDF files are accepted" }); + return; + } + + setState({ phase: "uploading" }); + + try { + const formData = new FormData(); + formData.append("file", file); + + const res = await fetch("/api/ingest", { + method: "POST", + body: formData, + }); + + if (!res.ok) { + const body = await res.json().catch(() => ({})); + throw new Error( + (body as { error?: string }).error ?? + `Upload failed with status ${res.status}`, + ); + } + + const status: RunStatus = (await res.json()) as RunStatus; + + if (isTerminalStatus(status.status)) { + setState({ phase: "done", runStatus: status }); + } else { + setState({ phase: "streaming", runStatus: status }); + startStream(status.runId); + } + } catch (err) { + setState({ + phase: "error", + message: err instanceof Error ? err.message : String(err), + }); + } + }, + [startStream], + ); + + const reset = useCallback(() => { + stopStream(); + setState({ phase: "idle" }); + }, [stopStream]); + + return { state, upload, reset }; +} diff --git a/apps/hash-frontend/src/pages/ingest/results.page.tsx b/apps/hash-frontend/src/pages/ingest/results.page.tsx new file mode 100644 index 00000000000..8266585cd46 --- /dev/null +++ b/apps/hash-frontend/src/pages/ingest/results.page.tsx @@ -0,0 +1,215 @@ +import { InfinityLightIcon } from "@hashintel/design-system"; +import { Box, Container, Typography } from "@mui/material"; +import { useRouter } from "next/router"; +import { useEffect, useMemo, useState } from "react"; + +import type { NextPageWithLayout } from "../../shared/layout"; +import { getLayoutWithSidebar } from "../../shared/layout"; +import { Button } from "../../shared/ui/button"; +import { WorkersHeader } from "../../shared/workers-header"; +import type { Selection } from "../ingest.page/evidence-resolver"; +import { resolveEvidence } from "../ingest.page/evidence-resolver"; +import { PageViewer } from "../ingest.page/page-viewer"; +import { ResultsPanel } from "../ingest.page/results-panel"; +import { + getIngestResultsPath, + getIngestResultsSource, + INGEST_FIXTURES, +} from "../ingest.page/routing"; +import type { IngestRunView } from "../ingest.page/types"; + +const normalizeQueryParam = ( + value: string | string[] | undefined, +): string | undefined => (typeof value === "string" ? value : value?.[0]); + +const IngestResultsPage: NextPageWithLayout = () => { + const router = useRouter(); + const source = useMemo( + () => + getIngestResultsSource({ + runId: normalizeQueryParam(router.query.runId), + fixture: normalizeQueryParam(router.query.fixture), + }), + [router.query.runId, router.query.fixture], + ); + + const [view, setView] = useState(null); + const [error, setError] = useState(null); + const [selection, setSelection] = useState(null); + const [currentPage, setCurrentPage] = useState(1); + const [loading, setLoading] = useState(false); + + useEffect(() => { + const abortController = new AbortController(); + + setView(null); + setError(null); + setSelection(null); + setCurrentPage(1); + setLoading(true); + + const endpoint = + source.kind === "fixture" + ? `/api/ingest-fixtures/${encodeURIComponent(source.fixtureId)}/view` + : `/api/ingest/${encodeURIComponent(source.runId)}/view`; + + void (async () => { + try { + const response = await fetch(endpoint, { + signal: abortController.signal, + }); + if (!response.ok) { + throw new Error(`Failed to load results: ${response.status}`); + } + const data = (await response.json()) as IngestRunView; + if (!abortController.signal.aborted) { + setView(data); + } + } catch (err) { + if ( + abortController.signal.aborted || + (err instanceof Error && err.name === "AbortError") + ) { + return; + } + + setError(err instanceof Error ? err.message : String(err)); + } finally { + if (!abortController.signal.aborted) { + setLoading(false); + } + } + })(); + + return () => { + abortController.abort(); + }; + }, [source]); + + const evidence = useMemo( + () => + view && selection + ? resolveEvidence(selection, view.corpus.blocks) + : { blockIds: [], targetPage: null }, + [selection, view], + ); + + useEffect(() => { + if (evidence.targetPage !== null) { + setCurrentPage(evidence.targetPage); + } + }, [evidence]); + + const handleFixtureChange = (fixtureId: string) => { + void router.push(getIngestResultsPath({ kind: "fixture", fixtureId })); + }; + + const handleNewUpload = () => { + void router.push("/ingest"); + }; + + return ( + <> + + {source.kind === "fixture" && ( + + )} + {view && ( + + {view.sourceMetadata.filename} · {view.pageImages.length} pages + · {view.roster.entries.length} entities · {view.claims.length}{" "} + claims + + )} + + } + /> + + {error && ( + + + Error loading results + + + {error} + + {source.kind === "run" && ( + + )} + + )} + + {!error && !view && ( + + + {loading ? "Loading results…" : "Preparing view…"} + + + )} + + {view && ( + + + + + {source.kind === "run" && ( + + )} + + + )} + + ); +}; + +IngestResultsPage.getLayout = (page) => + getLayoutWithSidebar(page, { fullWidth: true }); + +export default IngestResultsPage; diff --git a/apps/hash-frontend/src/shared/layout/layout-with-sidebar/sidebar.tsx b/apps/hash-frontend/src/shared/layout/layout-with-sidebar/sidebar.tsx index b25e56aeb18..ac5d6e154d3 100644 --- a/apps/hash-frontend/src/shared/layout/layout-with-sidebar/sidebar.tsx +++ b/apps/hash-frontend/src/shared/layout/layout-with-sidebar/sidebar.tsx @@ -108,6 +108,11 @@ export const PageSidebar: FunctionComponent = () => { path: "/workers", activeIfPathMatches: /^\/@([^/]+)\/workers\//, }, + { + title: "Ingest", + path: "/ingest", + activeIfPathMatches: /^\/ingest/, + }, ], }, ] diff --git a/yarn.lock b/yarn.lock index e83f2204a9e..7d8d75889ba 100644 --- a/yarn.lock +++ b/yarn.lock @@ -527,6 +527,7 @@ __metadata: resolution: "@apps/hash-frontend@workspace:apps/hash-frontend" dependencies: "@apollo/client": "npm:3.10.5" + "@ark-ui/react": "npm:5.26.2" "@blockprotocol/core": "npm:0.1.4" "@blockprotocol/graph": "workspace:*" "@blockprotocol/hook": "npm:0.1.8"