diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 7e012f0c..2db15fd2 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,4 +1,31 @@ +<#if analysisId??> +
+ + + +
+ + + <#if DUUI??> <#if DUUI.modelGroups?has_content> <#if DUUI.isTopic> diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 02687feb..5174f85d 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -1,6 +1,8 @@ package org.texttechnologylab.uce.analysis; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; @@ -8,12 +10,34 @@ import org.texttechnologylab.uce.analysis.modules.*; import org.texttechnologylab.uce.analysis.typeClasses.TextClass; + + + +import java.time.Instant; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.io.InputStream; +import java.io.DataOutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; + import java.util.*; public class RunDUUIPipeline { + private static final AnalysisCache analysisCache = new AnalysisCache(); + private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>(); + private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class); + public static AnalysisSession getCachedSession(String analysisId) { return analysisCache.get(analysisId); } + + private static String getCurrentUserId() { + // TODO: replace with your auth/session identity + return "user-unknown"; + } + public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception { ModelResources modelResources = new ModelResources(); List modelGroupsList = modelResources.getGroupedModelObjects(); @@ -189,10 +213,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT newCas.setDocumentText(text); cas = newCas; + logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); + } // run pipeline DUUIComposer composer = pipeline.setComposer(modelInfosMap); JCas result = pipeline.runPipeline(cas, composer); + logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); // get results Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups); // print results @@ -232,9 +259,28 @@ public DUUIInformation getModelResources(List modelGroups, String inputT if (isCohmetrix) { duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups); } + String analysisId = UUID.randomUUID().toString(); + String userId = getCurrentUserId(); + String title = "Analysis " + Instant.now(); + + byte[] xmiBytes = toXmiBytes(result); + AnalysisSession session = new AnalysisSession( + analysisId, userId, title, /*externalId*/ null, + result, /*xmiBytes*/ xmiBytes + ); + analysisCache.put(session); + lastAnalysisIdTL.set(analysisId); + logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); return duuiInformation; } + public AnalysisResponse getModelResourcesWithHandle(List modelGroups, String inputText, String claim, + String coherenceText, String stanceText, String systemPrompt) throws Exception { + DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt); + String id = lastAnalysisIdTL.get(); + return new AnalysisResponse(id, info); + } + public static void main(String[] args) throws Exception { ModelResources modelResources = new ModelResources(); List modelGroups = modelResources.getGroupedModelObjects(); @@ -256,5 +302,195 @@ public static void main(String[] args) throws Exception { DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt); } + public static final class AnalysisResponse { + public final String analysisId; + public final DUUIInformation duuiInformation; + + public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) { + this.analysisId = analysisId; + this.duuiInformation = duuiInformation; + } + } + + + //AnalysisSession + public static final class AnalysisSession { + public final String analysisId; + public final String userId; + public final long createdAtMillis; + public final String title; + public final String externalId; + public final JCas jcas; + public final byte[] xmiBytes; + + public AnalysisSession(String analysisId, String userId, String title, String externalId, + JCas jcas, byte[] xmiBytes) { + this.analysisId = analysisId; + this.userId = userId; + this.title = title; + this.externalId = externalId; + this.createdAtMillis = System.currentTimeMillis(); + this.jcas = jcas; + this.xmiBytes = xmiBytes; + } + } + + + // AnalysisCache + public static final class AnalysisCache { + private final Map map = new ConcurrentHashMap<>(); + private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes + + public void put(AnalysisSession s) { map.put(s.analysisId, s); } + + public AnalysisSession get(String id) { // Retrieve a session from the cache + AnalysisSession s = map.get(id); + if (s == null) return null; + + if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it + map.remove(id); + return null; + } + return s; + } + + public void remove(String id) { map.remove(id); } //Manually remove a session by ID + + + public void cleanupExpired() { // cleanup all expired sessions + long now = System.currentTimeMillis(); + for (var entry : map.entrySet()) { + AnalysisSession s = entry.getValue(); + if (now - s.createdAtMillis > ttlMillis) { + map.remove(entry.getKey()); + logger.info("[CRON] Removed expired session: " + s.analysisId); + } + } + } + } + private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes + java.util.concurrent.Executors.newScheduledThreadPool(1); + + static { + scheduler.scheduleAtFixedRate(() -> { + try { + analysisCache.cleanupExpired(); + } catch (Exception e) { + logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); + } + }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); + + scheduler.scheduleAtFixedRate(() -> { + logger.info("[CACHE] Running cache cleanup task..."); + analysisCache.cleanupExpired(); // your cleanup method + }, 1, 5, TimeUnit.MINUTES); + + + } + + + private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + org.apache.uima.cas.impl.XmiCasSerializer ser = + new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem()); + org.apache.uima.util.XMLSerializer xmlSer = + new org.apache.uima.util.XMLSerializer(bos, true); + xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); + ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); + return bos.toByteArray(); + } + + + // When we send CAS to the importer via HTTP, we want to capture the response. + // This small class acts like a container for the HTTP response details + private static class HttpResult { + final int status; + final String body; + final String locationHeader; + HttpResult(int status, String body, String locationHeader) { + this.status = status; this.body = body; this.locationHeader = locationHeader; + } + } + + + // Send CAS via HTTP + private static HttpResult postMultipart(String urlStr, + Map fields, + String fileField, String filename, + String fileContentType, byte[] fileBytes) throws Exception { + String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body + URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); + + try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body + // text fields + for (var e : fields.entrySet()) { + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n"); + out.write(e.getValue().getBytes(StandardCharsets.UTF_8)); + out.writeBytes("\r\n"); + } + // file field + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n"); + out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n"); + out.write(fileBytes); + out.writeBytes("\r\n"); + out.writeBytes("--" + boundary + "--\r\n"); + out.flush(); + } + + int status = conn.getResponseCode(); //Read the HTTP response from the importer + String location = conn.getHeaderField("Location"); + String body; + + try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) { + body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : ""; + } + conn.disconnect(); + return new HttpResult(status, body, location); + } + + public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer + String analysisId, + long corpusId, + String documentId, + String casView) throws Exception { + AnalysisSession s = getCachedSession(analysisId); + if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId); + + byte[] casBytes = toXmiBytes(s.jcas); + + Map fields = new LinkedHashMap<>(); // Form-data fields + fields.put("analysisId", analysisId); + fields.put("corpusId", Long.toString(corpusId)); + if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId); + if (casView != null && !casView.isBlank()) fields.put("casView", casView); + + + // Send multipart as XMI + String filename = "cas_" + analysisId + ".xmi"; + logger.info("[IMPORT][HTTP] POST " + importUrl + + " corpusId=" + corpusId + " analysisId=" + analysisId + + " documentId=" + documentId + " casView=" + casView + + " file=" + filename + " (" + casBytes.length + " bytes)"); + + HttpResult res = postMultipart( + importUrl, + fields, + "file", + filename, + "application/xml", + casBytes + ); + logger.info("[IMPORT][HTTP] status=" + res.status + + (res.locationHeader != null ? " Location=" + res.locationHeader : "") + + (res.body != null && !res.body.isBlank() ? " body=" + res.body : "")); + return res; + } + } diff --git a/uce.portal/uce.common/src/main/resources/corpusConfig2.json b/uce.portal/uce.common/src/main/resources/corpusConfig2.json new file mode 100644 index 00000000..3a18c818 --- /dev/null +++ b/uce.portal/uce.common/src/main/resources/corpusConfig2.json @@ -0,0 +1,45 @@ +{ + "name": "[corpus_2]", + "author": "[author/owner of the corpus]", + "language": "[de-DE, en-EN, ...]", + "description": "", + "addToExistingCorpus": false, + + "annotations": { + "annotatorMetadata": false, + "uceMetadata": false, + "logicalLinks": false, + + "OCRPage": false, + "OCRParagraph": false, + "OCRBlock": false, + "OCRLine": false, + + "srLink": false, + "namedEntity": false, + "sentiment": false, + "emotion": false, + "geoNames": false, + "lemma": false, + "sentence": false, + "taxon": { + "annotated": false, + "//comment": "[Are the taxons annotated with biofid onthologies through the 'identifier' property?]", + "biofidOnthologyAnnotated": false + }, + "time": false, + "wikipediaLink": false, + "completeNegation": false, + "unifiedTopic": false + + }, + "other": { + "//comment": "[Is this corpus also available on https://sammlungen.ub.uni-frankfurt.de/? Either true or false]", + "availableOnFrankfurtUniversityCollection": false, + + "includeKeywordDistribution": false, + "enableEmbeddings": false, + "enableRAGBot": false, + "enableS3Storage": false + } +} \ No newline at end of file diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 97534c64..5a1ac2f2 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -480,6 +480,7 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx)); post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx)); post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx)); + post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path }); path("/corpusUniverse", () -> { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index 866242bc..26a0ba7b 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -3,6 +3,7 @@ import com.google.gson.Gson; import freemarker.template.Configuration; import io.javalin.http.Context; +import io.javalin.http.Handler; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.context.ApplicationContext; @@ -56,10 +57,14 @@ public void runPipeline(Context ctx) { model.put("inputLLM", inputLLM); RunDUUIPipeline pipeline = new RunDUUIPipeline(); - DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM); + RunDUUIPipeline.AnalysisResponse resp = + pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim, + inputCoherence, inputStance, inputLLM); + DUUIInformation DataRequest = resp.duuiInformation; model.put("DUUI", DataRequest); model.put("SuccessRequest", true); model.put("modelGroups", DataRequest.getModelGroups()); + model.put("analysisId", resp.analysisId); // set history history.addDuuiInformation(String.valueOf(counter), DataRequest); @@ -180,5 +185,38 @@ public void callHistoryText(Context ctx) { ctx.render("defaultError.ftl"); } } - + // IMPORT ROUTE + @Authentication(required = Authentication.Requirement.LOGGED_IN, + route = Authentication.RouteTypes.POST, + path = "/api/analysis/importCas" + ) + public Handler importCas = ctx -> { + try { + String analysisId = ctx.queryParam("analysisId"); + if (analysisId == null || analysisId.isBlank()) { + ctx.status(400).result("Missing analysisId"); + return; + } + + // Lookup cached session + RunDUUIPipeline.AnalysisSession session = RunDUUIPipeline.getCachedSession(analysisId); + if (session == null) { + ctx.status(404).result("No cached CAS found for analysisId=" + analysisId); + return; + } + + // send to importer + long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=... + String importPath = "/api/ie/upload/uima"; + String importUrl = ctx.scheme() + "://" + ctx.host() + importPath; + + RunDUUIPipeline.sendToImporterViaHttp(importUrl, analysisId, corpusId, analysisId, null); + ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId); + } catch (NumberFormatException nfe) { + ctx.status(400).result("corpusId is required and must be a number"); + } catch (Exception e) { + logger.error("Error importing CAS", e); + ctx.status(500).result("Error importing CAS: " + e.getMessage()); + } + }; } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index a47013a9..41b262cb 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -28,7 +28,6 @@ public class ImportExportApi implements UceApi { private ApplicationContext serviceContext; private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class); - private static Gson gson = new Gson(); public ImportExportApi(ApplicationContext serviceContext) { this.serviceContext = serviceContext; @@ -74,9 +73,11 @@ public void uploadUIMA(Context ctx) { // First, we need to know which corpus this document should be added to. var corpusId = ExceptionUtils.tryCatchLog( () -> Long.parseLong(new String(ctx.req().getPart("corpusId").getInputStream().readAllBytes(), StandardCharsets.UTF_8)), - (ex) -> logger.error("Error getting the corpusId this document should be added to. Aborting.", ex)); + (ex) -> logger.error("Error getting corpusId from request.", ex)); + if (corpusId == null) { - ctx.result("Parameter corpusId didn't exist. Without it, the document cannot be uploaded."); + ctx.status(400); + ctx.result("Parameter corpusId didn't exist; cannot upload document."); return; } @@ -91,29 +92,12 @@ public void uploadUIMA(Context ctx) { var corpus = ExceptionUtils.tryCatchLog( () -> db.getCorpusById(corpusId), - (ex) -> logger.error("Couldn't fetch corpus when uploading new document to corpusId " + corpusId, ex)); + (ex) -> logger.error("Couldn't fetch corpus with id " + corpusId, ex)); + if (corpus == null) { - var corpusConfigRaw = ExceptionUtils.tryCatchLog( - () -> new String(ctx.req().getPart("corpusConfig").getInputStream().readAllBytes(), StandardCharsets.UTF_8), - (ex) -> logger.error("Error getting the corpusConfig that should be used for this document. Aborting.", ex)); - if (corpusConfigRaw == null) { - ctx.result("Corpus with id " + corpusId + " wasn't found in the database; no config was provided; can't upload document."); - return; - } - logger.info("Corpus with id " + corpusId + " wasn't found in the database; creating a new corpus with the provided config."); - try { - var corpusConfig = gson.fromJson(corpusConfigRaw, CorpusConfig.class); - corpus = new Corpus(); - var corpusReturn = Importer.CreateDBCorpus(corpus, corpusConfig, this.db); - if (corpusReturn != null) { - corpus = corpusReturn; - } - } catch (JsonIOException | JsonSyntaxException e) { - ctx.result("The corpusConfig provided is not properly formatted."); - } catch (DatabaseOperationException e) { - ctx.result("Error creating a new corpus in the database: " + e.getMessage()); - return; - } + ctx.status(404); + ctx.result("Corpus with id " + corpusId + " wasn't found in the database."); + return; } // TODO just use 1 as default? will throw an error if this is null otherwise... @@ -122,10 +106,9 @@ public void uploadUIMA(Context ctx) { try (var input = ctx.req().getPart("file").getInputStream()) { var fileName = ctx.req().getPart("file").getSubmittedFileName(); // Import the doc in the background - final Corpus corpus1 = corpus; var importFuture = CompletableFuture.supplyAsync(() -> { try { - return importer.storeUploadedXMIToCorpusAsync(input, corpus1, fileName, documentId); + return importer.storeUploadedXMIToCorpusAsync(input, corpus, fileName, documentId); } catch (DatabaseOperationException e) { throw new RuntimeException(e); } @@ -139,6 +122,7 @@ public void uploadUIMA(Context ctx) { if (acceptedContentType != null && acceptedContentType.equals("application/json")) { Map apiResult = new HashMap<>(); apiResult.put("document_id", newDocumentId); + ctx.contentType("application/json"); ctx.json(apiResult); return; }