Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions uce.portal/resources/templates/wiki/analysisResultFragment.ftl
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
<#if analysisId??>
<div class="mb-3" style="display:flex; align-items:center; gap:8px;">
<button id="saveCasBtn" data-analysis-id="${analysisId}" class="btn btn-primary">Save CAS</button>
<label for="corpus-select" class="mb-0">Corpus</label>
<select id="corpus-select" class="form-control form-control-sm" style="max-width:200px;">
<option value="1">Corpus 1</option>
<option value="2">Corpus 2</option>
<option value="3">Corpus 3</option>
</select>

</div>

<script>
document.getElementById("saveCasBtn").addEventListener("click", function () {
const analysisId = this.dataset.analysisId;
const corpusId = document.getElementById("corpus-select").selectedOptions[0].value;

fetch(
"/api/analysis/importCas?analysisId=" + encodeURIComponent(analysisId) +
"&corpusId=" + encodeURIComponent(corpusId),
{ method: "POST" }
)
.then(r => r.text())
.then(msg => alert("Server response: " + msg))
.catch(err => alert("Error: " + err));
});
</script>
</#if>
<#if DUUI??>
<#if DUUI.modelGroups?has_content>
<#if DUUI.isTopic>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,43 @@
package org.texttechnologylab.uce.analysis;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer;
import org.texttechnologylab.uce.analysis.modules.*;
import org.texttechnologylab.uce.analysis.typeClasses.TextClass;




import java.time.Instant;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.io.InputStream;
import java.io.DataOutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;

import java.util.*;


public class RunDUUIPipeline {
private static final AnalysisCache analysisCache = new AnalysisCache();
private static final ThreadLocal<String> lastAnalysisIdTL = new ThreadLocal<>();
private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class);


public static AnalysisSession getCachedSession(String analysisId) { return analysisCache.get(analysisId); }

private static String getCurrentUserId() {
// TODO: replace with your auth/session identity
return "user-unknown";
}

public DUUIInformation getModelResources(List<String> modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception {
ModelResources modelResources = new ModelResources();
List<ModelGroup> modelGroupsList = modelResources.getGroupedModelObjects();
Expand Down Expand Up @@ -189,10 +213,13 @@ public DUUIInformation getModelResources(List<String> modelGroups, String inputT
newCas.setDocumentText(text);
cas = newCas;

logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)");

}
// run pipeline
DUUIComposer composer = pipeline.setComposer(modelInfosMap);
JCas result = pipeline.runPipeline(cas, composer);
logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)");
// get results
Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups);
// print results
Expand Down Expand Up @@ -232,9 +259,28 @@ public DUUIInformation getModelResources(List<String> modelGroups, String inputT
if (isCohmetrix) {
duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups);
}
String analysisId = UUID.randomUUID().toString();
String userId = getCurrentUserId();
String title = "Analysis " + Instant.now();

byte[] xmiBytes = toXmiBytes(result);
AnalysisSession session = new AnalysisSession(
analysisId, userId, title, /*externalId*/ null,
result, /*xmiBytes*/ xmiBytes
);
analysisCache.put(session);
lastAnalysisIdTL.set(analysisId);
logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)");
return duuiInformation;
}

public AnalysisResponse getModelResourcesWithHandle(List<String> modelGroups, String inputText, String claim,
String coherenceText, String stanceText, String systemPrompt) throws Exception {
DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt);
String id = lastAnalysisIdTL.get();
return new AnalysisResponse(id, info);
}

public static void main(String[] args) throws Exception {
ModelResources modelResources = new ModelResources();
List<ModelGroup> modelGroups = modelResources.getGroupedModelObjects();
Expand All @@ -256,5 +302,195 @@ public static void main(String[] args) throws Exception {
DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt);

}
public static final class AnalysisResponse {
public final String analysisId;
public final DUUIInformation duuiInformation;

public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) {
this.analysisId = analysisId;
this.duuiInformation = duuiInformation;
}
}


//AnalysisSession
public static final class AnalysisSession {
public final String analysisId;
public final String userId;
public final long createdAtMillis;
public final String title;
public final String externalId;
public final JCas jcas;
public final byte[] xmiBytes;

public AnalysisSession(String analysisId, String userId, String title, String externalId,
JCas jcas, byte[] xmiBytes) {
this.analysisId = analysisId;
this.userId = userId;
this.title = title;
this.externalId = externalId;
this.createdAtMillis = System.currentTimeMillis();
this.jcas = jcas;
this.xmiBytes = xmiBytes;
}
}


// AnalysisCache
public static final class AnalysisCache {
private final Map<String, AnalysisSession> map = new ConcurrentHashMap<>();
private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes

public void put(AnalysisSession s) { map.put(s.analysisId, s); }

public AnalysisSession get(String id) { // Retrieve a session from the cache
AnalysisSession s = map.get(id);
if (s == null) return null;

if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it
map.remove(id);
return null;
}
return s;
}

public void remove(String id) { map.remove(id); } //Manually remove a session by ID


public void cleanupExpired() { // cleanup all expired sessions
long now = System.currentTimeMillis();
for (var entry : map.entrySet()) {
AnalysisSession s = entry.getValue();
if (now - s.createdAtMillis > ttlMillis) {
map.remove(entry.getKey());
logger.info("[CRON] Removed expired session: " + s.analysisId);
}
}
}
}
private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes
java.util.concurrent.Executors.newScheduledThreadPool(1);

static {
scheduler.scheduleAtFixedRate(() -> {
try {
analysisCache.cleanupExpired();
} catch (Exception e) {
logger.error("[CACHE] Cache cleanup failed: " + e.getMessage());
}
}, 5, 5, java.util.concurrent.TimeUnit.MINUTES);

scheduler.scheduleAtFixedRate(() -> {
logger.info("[CACHE] Running cache cleanup task...");
analysisCache.cleanupExpired(); // your cleanup method
}, 1, 5, TimeUnit.MINUTES);


}


private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception {
java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
org.apache.uima.cas.impl.XmiCasSerializer ser =
new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem());
org.apache.uima.util.XMLSerializer xmlSer =
new org.apache.uima.util.XMLSerializer(bos, true);
xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1");
ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
return bos.toByteArray();
}


// When we send CAS to the importer via HTTP, we want to capture the response.
// This small class acts like a container for the HTTP response details
private static class HttpResult {
final int status;
final String body;
final String locationHeader;
HttpResult(int status, String body, String locationHeader) {
this.status = status; this.body = body; this.locationHeader = locationHeader;
}
}


// Send CAS via HTTP
private static HttpResult postMultipart(String urlStr,
Map<String, String> fields,
String fileField, String filename,
String fileContentType, byte[] fileBytes) throws Exception {
String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body
URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary);

try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body
// text fields
for (var e : fields.entrySet()) {
out.writeBytes("--" + boundary + "\r\n");
out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n");
out.write(e.getValue().getBytes(StandardCharsets.UTF_8));
out.writeBytes("\r\n");
}
// file field
out.writeBytes("--" + boundary + "\r\n");
out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n");
out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n");
out.write(fileBytes);
out.writeBytes("\r\n");
out.writeBytes("--" + boundary + "--\r\n");
out.flush();
}

int status = conn.getResponseCode(); //Read the HTTP response from the importer
String location = conn.getHeaderField("Location");
String body;

try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) {
body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : "";
}
conn.disconnect();
return new HttpResult(status, body, location);
}

public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer
String analysisId,
long corpusId,
String documentId,
String casView) throws Exception {
AnalysisSession s = getCachedSession(analysisId);
if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId);

byte[] casBytes = toXmiBytes(s.jcas);

Map<String, String> fields = new LinkedHashMap<>(); // Form-data fields
fields.put("analysisId", analysisId);
fields.put("corpusId", Long.toString(corpusId));
if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId);
if (casView != null && !casView.isBlank()) fields.put("casView", casView);


// Send multipart as XMI
String filename = "cas_" + analysisId + ".xmi";
logger.info("[IMPORT][HTTP] POST " + importUrl
+ " corpusId=" + corpusId + " analysisId=" + analysisId
+ " documentId=" + documentId + " casView=" + casView
+ " file=" + filename + " (" + casBytes.length + " bytes)");

HttpResult res = postMultipart(
importUrl,
fields,
"file",
filename,
"application/xml",
casBytes
);
logger.info("[IMPORT][HTTP] status=" + res.status
+ (res.locationHeader != null ? " Location=" + res.locationHeader : "")
+ (res.body != null && !res.body.isBlank() ? " body=" + res.body : ""));
return res;
}


}
45 changes: 45 additions & 0 deletions uce.portal/uce.common/src/main/resources/corpusConfig2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name": "[corpus_2]",
"author": "[author/owner of the corpus]",
"language": "[de-DE, en-EN, ...]",
"description": "",
"addToExistingCorpus": false,

"annotations": {
"annotatorMetadata": false,
"uceMetadata": false,
"logicalLinks": false,

"OCRPage": false,
"OCRParagraph": false,
"OCRBlock": false,
"OCRLine": false,

"srLink": false,
"namedEntity": false,
"sentiment": false,
"emotion": false,
"geoNames": false,
"lemma": false,
"sentence": false,
"taxon": {
"annotated": false,
"//comment": "[Are the taxons annotated with biofid onthologies through the 'identifier' property?]",
"biofidOnthologyAnnotated": false
},
"time": false,
"wikipediaLink": false,
"completeNegation": false,
"unifiedTopic": false

},
"other": {
"//comment": "[Is this corpus also available on https://sammlungen.ub.uni-frankfurt.de/? Either true or false]",
"availableOnFrankfurtUniversityCollection": false,

"includeKeywordDistribution": false,
"enableEmbeddings": false,
"enableRAGBot": false,
"enableS3Storage": false
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi
get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx));
post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx));
post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx));
post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path
});

path("/corpusUniverse", () -> {
Expand Down
Loading