diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl
index 7e012f0c..2db15fd2 100644
--- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl
+++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl
@@ -1,4 +1,31 @@
+<#if analysisId??>
+
+
+
+
+
+
+
+#if>
<#if DUUI??>
<#if DUUI.modelGroups?has_content>
<#if DUUI.isTopic>
diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java
index 02687feb..5174f85d 100644
--- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java
+++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java
@@ -1,6 +1,8 @@
package org.texttechnologylab.uce.analysis;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
@@ -8,12 +10,34 @@
import org.texttechnologylab.uce.analysis.modules.*;
import org.texttechnologylab.uce.analysis.typeClasses.TextClass;
+
+
+
+import java.time.Instant;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.io.InputStream;
+import java.io.DataOutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+
import java.util.*;
public class RunDUUIPipeline {
+ private static final AnalysisCache analysisCache = new AnalysisCache();
+ private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>();
+ private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class);
+ public static AnalysisSession getCachedSession(String analysisId) { return analysisCache.get(analysisId); }
+
+ private static String getCurrentUserId() {
+ // TODO: replace with your auth/session identity
+ return "user-unknown";
+ }
+
public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception {
ModelResources modelResources = new ModelResources();
List modelGroupsList = modelResources.getGroupedModelObjects();
@@ -189,10 +213,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT
newCas.setDocumentText(text);
cas = newCas;
+ logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)");
+
}
// run pipeline
DUUIComposer composer = pipeline.setComposer(modelInfosMap);
JCas result = pipeline.runPipeline(cas, composer);
+ logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)");
// get results
Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups);
// print results
@@ -232,9 +259,28 @@ public DUUIInformation getModelResources(List modelGroups, String inputT
if (isCohmetrix) {
duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups);
}
+ String analysisId = UUID.randomUUID().toString();
+ String userId = getCurrentUserId();
+ String title = "Analysis " + Instant.now();
+
+ byte[] xmiBytes = toXmiBytes(result);
+ AnalysisSession session = new AnalysisSession(
+ analysisId, userId, title, /*externalId*/ null,
+ result, /*xmiBytes*/ xmiBytes
+ );
+ analysisCache.put(session);
+ lastAnalysisIdTL.set(analysisId);
+ logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)");
return duuiInformation;
}
+ public AnalysisResponse getModelResourcesWithHandle(List modelGroups, String inputText, String claim,
+ String coherenceText, String stanceText, String systemPrompt) throws Exception {
+ DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt);
+ String id = lastAnalysisIdTL.get();
+ return new AnalysisResponse(id, info);
+ }
+
public static void main(String[] args) throws Exception {
ModelResources modelResources = new ModelResources();
List modelGroups = modelResources.getGroupedModelObjects();
@@ -256,5 +302,195 @@ public static void main(String[] args) throws Exception {
DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt);
}
+ public static final class AnalysisResponse {
+ public final String analysisId;
+ public final DUUIInformation duuiInformation;
+
+ public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) {
+ this.analysisId = analysisId;
+ this.duuiInformation = duuiInformation;
+ }
+ }
+
+
+ //AnalysisSession
+ public static final class AnalysisSession {
+ public final String analysisId;
+ public final String userId;
+ public final long createdAtMillis;
+ public final String title;
+ public final String externalId;
+ public final JCas jcas;
+ public final byte[] xmiBytes;
+
+ public AnalysisSession(String analysisId, String userId, String title, String externalId,
+ JCas jcas, byte[] xmiBytes) {
+ this.analysisId = analysisId;
+ this.userId = userId;
+ this.title = title;
+ this.externalId = externalId;
+ this.createdAtMillis = System.currentTimeMillis();
+ this.jcas = jcas;
+ this.xmiBytes = xmiBytes;
+ }
+ }
+
+
+ // AnalysisCache
+ public static final class AnalysisCache {
+ private final Map map = new ConcurrentHashMap<>();
+ private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes
+
+ public void put(AnalysisSession s) { map.put(s.analysisId, s); }
+
+ public AnalysisSession get(String id) { // Retrieve a session from the cache
+ AnalysisSession s = map.get(id);
+ if (s == null) return null;
+
+ if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it
+ map.remove(id);
+ return null;
+ }
+ return s;
+ }
+
+ public void remove(String id) { map.remove(id); } //Manually remove a session by ID
+
+
+ public void cleanupExpired() { // cleanup all expired sessions
+ long now = System.currentTimeMillis();
+ for (var entry : map.entrySet()) {
+ AnalysisSession s = entry.getValue();
+ if (now - s.createdAtMillis > ttlMillis) {
+ map.remove(entry.getKey());
+ logger.info("[CRON] Removed expired session: " + s.analysisId);
+ }
+ }
+ }
+ }
+ private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes
+ java.util.concurrent.Executors.newScheduledThreadPool(1);
+
+ static {
+ scheduler.scheduleAtFixedRate(() -> {
+ try {
+ analysisCache.cleanupExpired();
+ } catch (Exception e) {
+ logger.error("[CACHE] Cache cleanup failed: " + e.getMessage());
+ }
+ }, 5, 5, java.util.concurrent.TimeUnit.MINUTES);
+
+ scheduler.scheduleAtFixedRate(() -> {
+ logger.info("[CACHE] Running cache cleanup task...");
+ analysisCache.cleanupExpired(); // your cleanup method
+ }, 1, 5, TimeUnit.MINUTES);
+
+
+ }
+
+
+ private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception {
+ java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
+ org.apache.uima.cas.impl.XmiCasSerializer ser =
+ new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem());
+ org.apache.uima.util.XMLSerializer xmlSer =
+ new org.apache.uima.util.XMLSerializer(bos, true);
+ xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1");
+ ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
+ return bos.toByteArray();
+ }
+
+
+ // When we send CAS to the importer via HTTP, we want to capture the response.
+ // This small class acts like a container for the HTTP response details
+ private static class HttpResult {
+ final int status;
+ final String body;
+ final String locationHeader;
+ HttpResult(int status, String body, String locationHeader) {
+ this.status = status; this.body = body; this.locationHeader = locationHeader;
+ }
+ }
+
+
+ // Send CAS via HTTP
+ private static HttpResult postMultipart(String urlStr,
+ Map fields,
+ String fileField, String filename,
+ String fileContentType, byte[] fileBytes) throws Exception {
+ String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body
+ URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setDoOutput(true);
+ conn.setRequestMethod("POST");
+ conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary);
+
+ try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body
+ // text fields
+ for (var e : fields.entrySet()) {
+ out.writeBytes("--" + boundary + "\r\n");
+ out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n");
+ out.write(e.getValue().getBytes(StandardCharsets.UTF_8));
+ out.writeBytes("\r\n");
+ }
+ // file field
+ out.writeBytes("--" + boundary + "\r\n");
+ out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n");
+ out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n");
+ out.write(fileBytes);
+ out.writeBytes("\r\n");
+ out.writeBytes("--" + boundary + "--\r\n");
+ out.flush();
+ }
+
+ int status = conn.getResponseCode(); //Read the HTTP response from the importer
+ String location = conn.getHeaderField("Location");
+ String body;
+
+ try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) {
+ body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : "";
+ }
+ conn.disconnect();
+ return new HttpResult(status, body, location);
+ }
+
+ public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer
+ String analysisId,
+ long corpusId,
+ String documentId,
+ String casView) throws Exception {
+ AnalysisSession s = getCachedSession(analysisId);
+ if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId);
+
+ byte[] casBytes = toXmiBytes(s.jcas);
+
+ Map fields = new LinkedHashMap<>(); // Form-data fields
+ fields.put("analysisId", analysisId);
+ fields.put("corpusId", Long.toString(corpusId));
+ if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId);
+ if (casView != null && !casView.isBlank()) fields.put("casView", casView);
+
+
+ // Send multipart as XMI
+ String filename = "cas_" + analysisId + ".xmi";
+ logger.info("[IMPORT][HTTP] POST " + importUrl
+ + " corpusId=" + corpusId + " analysisId=" + analysisId
+ + " documentId=" + documentId + " casView=" + casView
+ + " file=" + filename + " (" + casBytes.length + " bytes)");
+
+ HttpResult res = postMultipart(
+ importUrl,
+ fields,
+ "file",
+ filename,
+ "application/xml",
+ casBytes
+ );
+ logger.info("[IMPORT][HTTP] status=" + res.status
+ + (res.locationHeader != null ? " Location=" + res.locationHeader : "")
+ + (res.body != null && !res.body.isBlank() ? " body=" + res.body : ""));
+ return res;
+ }
+
}
diff --git a/uce.portal/uce.common/src/main/resources/corpusConfig2.json b/uce.portal/uce.common/src/main/resources/corpusConfig2.json
new file mode 100644
index 00000000..3a18c818
--- /dev/null
+++ b/uce.portal/uce.common/src/main/resources/corpusConfig2.json
@@ -0,0 +1,45 @@
+{
+ "name": "[corpus_2]",
+ "author": "[author/owner of the corpus]",
+ "language": "[de-DE, en-EN, ...]",
+ "description": "",
+ "addToExistingCorpus": false,
+
+ "annotations": {
+ "annotatorMetadata": false,
+ "uceMetadata": false,
+ "logicalLinks": false,
+
+ "OCRPage": false,
+ "OCRParagraph": false,
+ "OCRBlock": false,
+ "OCRLine": false,
+
+ "srLink": false,
+ "namedEntity": false,
+ "sentiment": false,
+ "emotion": false,
+ "geoNames": false,
+ "lemma": false,
+ "sentence": false,
+ "taxon": {
+ "annotated": false,
+ "//comment": "[Are the taxons annotated with biofid onthologies through the 'identifier' property?]",
+ "biofidOnthologyAnnotated": false
+ },
+ "time": false,
+ "wikipediaLink": false,
+ "completeNegation": false,
+ "unifiedTopic": false
+
+ },
+ "other": {
+ "//comment": "[Is this corpus also available on https://sammlungen.ub.uni-frankfurt.de/? Either true or false]",
+ "availableOnFrankfurtUniversityCollection": false,
+
+ "includeKeywordDistribution": false,
+ "enableEmbeddings": false,
+ "enableRAGBot": false,
+ "enableS3Storage": false
+ }
+}
\ No newline at end of file
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java
index 97534c64..5a1ac2f2 100644
--- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java
+++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java
@@ -480,6 +480,7 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi
get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx));
post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx));
post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx));
+ post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path
});
path("/corpusUniverse", () -> {
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java
index 866242bc..26a0ba7b 100644
--- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java
+++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java
@@ -3,6 +3,7 @@
import com.google.gson.Gson;
import freemarker.template.Configuration;
import io.javalin.http.Context;
+import io.javalin.http.Handler;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.context.ApplicationContext;
@@ -56,10 +57,14 @@ public void runPipeline(Context ctx) {
model.put("inputLLM", inputLLM);
RunDUUIPipeline pipeline = new RunDUUIPipeline();
- DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM);
+ RunDUUIPipeline.AnalysisResponse resp =
+ pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim,
+ inputCoherence, inputStance, inputLLM);
+ DUUIInformation DataRequest = resp.duuiInformation;
model.put("DUUI", DataRequest);
model.put("SuccessRequest", true);
model.put("modelGroups", DataRequest.getModelGroups());
+ model.put("analysisId", resp.analysisId);
// set history
history.addDuuiInformation(String.valueOf(counter), DataRequest);
@@ -180,5 +185,38 @@ public void callHistoryText(Context ctx) {
ctx.render("defaultError.ftl");
}
}
-
+ // IMPORT ROUTE
+ @Authentication(required = Authentication.Requirement.LOGGED_IN,
+ route = Authentication.RouteTypes.POST,
+ path = "/api/analysis/importCas"
+ )
+ public Handler importCas = ctx -> {
+ try {
+ String analysisId = ctx.queryParam("analysisId");
+ if (analysisId == null || analysisId.isBlank()) {
+ ctx.status(400).result("Missing analysisId");
+ return;
+ }
+
+ // Lookup cached session
+ RunDUUIPipeline.AnalysisSession session = RunDUUIPipeline.getCachedSession(analysisId);
+ if (session == null) {
+ ctx.status(404).result("No cached CAS found for analysisId=" + analysisId);
+ return;
+ }
+
+ // send to importer
+ long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=...
+ String importPath = "/api/ie/upload/uima";
+ String importUrl = ctx.scheme() + "://" + ctx.host() + importPath;
+
+ RunDUUIPipeline.sendToImporterViaHttp(importUrl, analysisId, corpusId, analysisId, null);
+ ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId);
+ } catch (NumberFormatException nfe) {
+ ctx.status(400).result("corpusId is required and must be a number");
+ } catch (Exception e) {
+ logger.error("Error importing CAS", e);
+ ctx.status(500).result("Error importing CAS: " + e.getMessage());
+ }
+ };
}
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java
index a47013a9..41b262cb 100644
--- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java
+++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java
@@ -28,7 +28,6 @@ public class ImportExportApi implements UceApi {
private ApplicationContext serviceContext;
private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class);
- private static Gson gson = new Gson();
public ImportExportApi(ApplicationContext serviceContext) {
this.serviceContext = serviceContext;
@@ -74,9 +73,11 @@ public void uploadUIMA(Context ctx) {
// First, we need to know which corpus this document should be added to.
var corpusId = ExceptionUtils.tryCatchLog(
() -> Long.parseLong(new String(ctx.req().getPart("corpusId").getInputStream().readAllBytes(), StandardCharsets.UTF_8)),
- (ex) -> logger.error("Error getting the corpusId this document should be added to. Aborting.", ex));
+ (ex) -> logger.error("Error getting corpusId from request.", ex));
+
if (corpusId == null) {
- ctx.result("Parameter corpusId didn't exist. Without it, the document cannot be uploaded.");
+ ctx.status(400);
+ ctx.result("Parameter corpusId didn't exist; cannot upload document.");
return;
}
@@ -91,29 +92,12 @@ public void uploadUIMA(Context ctx) {
var corpus = ExceptionUtils.tryCatchLog(
() -> db.getCorpusById(corpusId),
- (ex) -> logger.error("Couldn't fetch corpus when uploading new document to corpusId " + corpusId, ex));
+ (ex) -> logger.error("Couldn't fetch corpus with id " + corpusId, ex));
+
if (corpus == null) {
- var corpusConfigRaw = ExceptionUtils.tryCatchLog(
- () -> new String(ctx.req().getPart("corpusConfig").getInputStream().readAllBytes(), StandardCharsets.UTF_8),
- (ex) -> logger.error("Error getting the corpusConfig that should be used for this document. Aborting.", ex));
- if (corpusConfigRaw == null) {
- ctx.result("Corpus with id " + corpusId + " wasn't found in the database; no config was provided; can't upload document.");
- return;
- }
- logger.info("Corpus with id " + corpusId + " wasn't found in the database; creating a new corpus with the provided config.");
- try {
- var corpusConfig = gson.fromJson(corpusConfigRaw, CorpusConfig.class);
- corpus = new Corpus();
- var corpusReturn = Importer.CreateDBCorpus(corpus, corpusConfig, this.db);
- if (corpusReturn != null) {
- corpus = corpusReturn;
- }
- } catch (JsonIOException | JsonSyntaxException e) {
- ctx.result("The corpusConfig provided is not properly formatted.");
- } catch (DatabaseOperationException e) {
- ctx.result("Error creating a new corpus in the database: " + e.getMessage());
- return;
- }
+ ctx.status(404);
+ ctx.result("Corpus with id " + corpusId + " wasn't found in the database.");
+ return;
}
// TODO just use 1 as default? will throw an error if this is null otherwise...
@@ -122,10 +106,9 @@ public void uploadUIMA(Context ctx) {
try (var input = ctx.req().getPart("file").getInputStream()) {
var fileName = ctx.req().getPart("file").getSubmittedFileName();
// Import the doc in the background
- final Corpus corpus1 = corpus;
var importFuture = CompletableFuture.supplyAsync(() -> {
try {
- return importer.storeUploadedXMIToCorpusAsync(input, corpus1, fileName, documentId);
+ return importer.storeUploadedXMIToCorpusAsync(input, corpus, fileName, documentId);
} catch (DatabaseOperationException e) {
throw new RuntimeException(e);
}
@@ -139,6 +122,7 @@ public void uploadUIMA(Context ctx) {
if (acceptedContentType != null && acceptedContentType.equals("application/json")) {
Map apiResult = new HashMap<>();
apiResult.put("document_id", newDocumentId);
+ ctx.contentType("application/json");
ctx.json(apiResult);
return;
}