diff --git a/uce.portal/resources/templates/corpus/corpusInspector.ftl b/uce.portal/resources/templates/corpus/corpusInspector.ftl
index 484c6790..c074f490 100644
--- a/uce.portal/resources/templates/corpus/corpusInspector.ftl
+++ b/uce.portal/resources/templates/corpus/corpusInspector.ftl
@@ -17,6 +17,17 @@
+
diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl
index 98e75ac8..987da127 100644
--- a/uce.portal/resources/templates/landing-page.ftl
+++ b/uce.portal/resources/templates/landing-page.ftl
@@ -3,7 +3,8 @@
-
${uceConfig.getMeta().getName()?trim!"-"}
+ ${uceConfig.getMeta().getName()?trim!"-"}
@@ -15,8 +16,21 @@
-
${languageResource.get("corpora")}
+
+
${languageResource.get("corpora")}
+ <#if uceConfig.settings.enablePathImport?? && uceConfig.settings.enablePathImport>
+
+ #if>
+
+
+
+
<#if corpora?size == 0>
@@ -33,7 +47,8 @@
data-id="${corpusVm.getCorpus().getId()}">
${corpusVm.getCorpus().getName()?trim}
-
${corpusVm.getCorpus().getAuthor()}
+
${corpusVm.getCorpus().getAuthor()}
+
+
+<#--Modal for importing files via a path-->
+
+
+<#--Modal for uploading files-->
+
+
+
\ No newline at end of file
diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl
index 7e012f0c..00cd6d5c 100644
--- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl
+++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl
@@ -1,4 +1,23 @@
+<#if analysisId??>
+
+
+
+
+#if>
<#if DUUI??>
<#if DUUI.modelGroups?has_content>
<#if DUUI.isTopic>
diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java
index 3cb3d72a..e016d4ae 100644
--- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java
+++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java
@@ -129,8 +129,8 @@ public JCas getLanguage(String inputText) throws Exception {
public JCas getSentences(JCas cas) throws Exception {
HashMap
spacyUrls = new HashMap<>();
- spacyUrls.put("Spacy", "http://spacy-cohmetrix.service.component.duui.texttechnologylab.org");
-// spacyUrls.put("Spacy", "http://spacy.service.component.duui.texttechnologylab.org");
+// spacyUrls.put("Spacy", "http://spacy-cohmetrix.service.component.duui.texttechnologylab.org");
+ spacyUrls.put("Spacy", "http://spacy.service.component.duui.texttechnologylab.org");
spacyUrls.put("Syntok", "http://paragraph-syntok.service.component.duui.texttechnologylab.org/");
DUUIComposer composer = setListComposer(spacyUrls);
cas = runPipeline(cas, composer);
diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java
index 02687feb..c34c6f15 100644
--- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java
+++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java
@@ -1,6 +1,8 @@
package org.texttechnologylab.uce.analysis;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
@@ -8,11 +10,41 @@
import org.texttechnologylab.uce.analysis.modules.*;
import org.texttechnologylab.uce.analysis.typeClasses.TextClass;
+
+
+
+import java.time.Instant;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.io.InputStream;
+import java.io.DataOutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+
import java.util.*;
public class RunDUUIPipeline {
+ private static final AnalysisCache analysisCache = new AnalysisCache();
+ private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>();
+ private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class);
+ private static final ThreadLocal currentUserIdTL = new ThreadLocal<>();
+
+
+ public static AnalysisSession getCachedSession(String analysisId) {
+ return analysisCache.get(analysisId);
+ }
+ public static void setThreadLocalUserId(String userId) {
+ currentUserIdTL.set(userId);
+ }
+
+ private static String getCurrentUserId() {
+ // TODO: replace with your auth/session identity
+
+ return currentUserIdTL.get();
+ }
public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception {
ModelResources modelResources = new ModelResources();
@@ -189,10 +221,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT
newCas.setDocumentText(text);
cas = newCas;
+ logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)");
+
}
// run pipeline
DUUIComposer composer = pipeline.setComposer(modelInfosMap);
JCas result = pipeline.runPipeline(cas, composer);
+ logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)");
// get results
Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups);
// print results
@@ -232,9 +267,29 @@ public DUUIInformation getModelResources(List modelGroups, String inputT
if (isCohmetrix) {
duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups);
}
+ String analysisId = UUID.randomUUID().toString();
+ String userId = getCurrentUserId();
+ logger.info("[USER] Running pipeline for User: " + userId);
+ String title = "Analysis " + Instant.now();
+
+ byte[] xmiBytes = toXmiBytes(result);
+ AnalysisSession session = new AnalysisSession(
+ analysisId, userId, title, /*externalId*/ null,
+ result, /*xmiBytes*/ xmiBytes
+ );
+ analysisCache.put(session);
+ lastAnalysisIdTL.set(analysisId);
+ logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)");
return duuiInformation;
}
+ public AnalysisResponse getModelResourcesWithHandle(List modelGroups, String inputText, String claim,
+ String coherenceText, String stanceText, String systemPrompt) throws Exception {
+ DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt);
+ String id = lastAnalysisIdTL.get();
+ return new AnalysisResponse(id, info);
+ }
+
public static void main(String[] args) throws Exception {
ModelResources modelResources = new ModelResources();
List modelGroups = modelResources.getGroupedModelObjects();
@@ -256,5 +311,195 @@ public static void main(String[] args) throws Exception {
DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt);
}
+ public static final class AnalysisResponse {
+ public final String analysisId;
+ public final DUUIInformation duuiInformation;
+
+ public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) {
+ this.analysisId = analysisId;
+ this.duuiInformation = duuiInformation;
+ }
+ }
+
+
+ //AnalysisSession
+ public static final class AnalysisSession {
+ public final String analysisId;
+ public final String userId;
+ public final long createdAtMillis;
+ public final String title;
+ public final String externalId;
+ public final JCas jcas;
+ public final byte[] xmiBytes;
+
+ public AnalysisSession(String analysisId, String userId, String title, String externalId,
+ JCas jcas, byte[] xmiBytes) {
+ this.analysisId = analysisId;
+ this.userId = userId;
+ this.title = title;
+ this.externalId = externalId;
+ this.createdAtMillis = System.currentTimeMillis();
+ this.jcas = jcas;
+ this.xmiBytes = xmiBytes;
+ }
+ }
+
+
+ // AnalysisCache
+ public static final class AnalysisCache {
+ private final Map map = new ConcurrentHashMap<>();
+ private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes
+
+ public void put(AnalysisSession s) { map.put(s.analysisId, s); }
+
+ public AnalysisSession get(String id) { // Retrieve a session from the cache
+ AnalysisSession s = map.get(id);
+ if (s == null) return null;
+
+ if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it
+ map.remove(id);
+ return null;
+ }
+ return s;
+ }
+
+// public void remove(String id) {
+// map.remove(id);
+// } //Manually remove a session by ID
+//
+//
+// public void cleanupExpired() { // cleanup all expired sessions
+// long now = System.currentTimeMillis();
+// for (var entry : map.entrySet()) {
+// AnalysisSession s = entry.getValue();
+// if (now - s.createdAtMillis > ttlMillis) {
+// map.remove(entry.getKey());
+// logger.info("[CRON] Removed expired session: " + s.analysisId);
+// }
+// }
+// }
+// }
+// private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes
+// java.util.concurrent.Executors.newScheduledThreadPool(1);
+//
+// static {
+// scheduler.scheduleAtFixedRate(() -> {
+// try {
+// analysisCache.cleanupExpired();
+// } catch (Exception e) {
+// logger.error("[CACHE] Cache cleanup failed: " + e.getMessage());
+// }
+// }, 5, 5, java.util.concurrent.TimeUnit.MINUTES);
+//
+// scheduler.scheduleAtFixedRate(() -> {
+// logger.info("[CACHE] Running cache cleanup task...");
+// analysisCache.cleanupExpired(); // your cleanup method
+// }, 1, 5, TimeUnit.MINUTES);
+//
+//
+ }
+ private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception {
+ java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
+ org.apache.uima.cas.impl.XmiCasSerializer ser =
+ new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem());
+ org.apache.uima.util.XMLSerializer xmlSer =
+ new org.apache.uima.util.XMLSerializer(bos, true);
+ xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1");
+ ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
+ return bos.toByteArray();
+ }
+
+
+ // When we send CAS to the importer via HTTP, we want to capture the response.
+ // This small class acts like a container for the HTTP response details
+ private static class HttpResult {
+ final int status;
+ final String body;
+ final String locationHeader;
+ HttpResult(int status, String body, String locationHeader) {
+ this.status = status; this.body = body; this.locationHeader = locationHeader;
+ }
+ }
+
+
+ // Send CAS via HTTP
+ private static HttpResult postMultipart(String urlStr,
+ Map fields,
+ String fileField, String filename,
+ String fileContentType, byte[] fileBytes) throws Exception {
+ String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body
+ URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setDoOutput(true);
+ conn.setRequestMethod("POST");
+ conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary);
+
+ try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body
+ // text fields
+ for (var e : fields.entrySet()) {
+ out.writeBytes("--" + boundary + "\r\n");
+ out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n");
+ out.write(e.getValue().getBytes(StandardCharsets.UTF_8));
+ out.writeBytes("\r\n");
+ }
+ // file field
+ out.writeBytes("--" + boundary + "\r\n");
+ out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n");
+ out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n");
+ out.write(fileBytes);
+ out.writeBytes("\r\n");
+ out.writeBytes("--" + boundary + "--\r\n");
+ out.flush();
+ }
+
+ int status = conn.getResponseCode(); //Read the HTTP response from the importer
+ String location = conn.getHeaderField("Location");
+ String body;
+
+ try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) {
+ body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : "";
+ }
+ conn.disconnect();
+ return new HttpResult(status, body, location);
+ }
+
+ public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer
+ String analysisId,
+ long corpusId,
+ String documentId,
+ String casView) throws Exception {
+ AnalysisSession s = getCachedSession(analysisId);
+ if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId);
+
+ byte[] casBytes = toXmiBytes(s.jcas);
+
+ Map fields = new LinkedHashMap<>(); // Form-data fields
+ fields.put("analysisId", analysisId);
+ fields.put("corpusId", Long.toString(corpusId));
+ if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId);
+ if (casView != null && !casView.isBlank()) fields.put("casView", casView);
+
+
+ // Send multipart as XMI
+ String filename = "cas_" + analysisId + ".xmi";
+ logger.info("[IMPORT][HTTP] POST " + importUrl
+ + " corpusId=" + corpusId + " analysisId=" + analysisId
+ + " documentId=" + documentId + " casView=" + casView
+ + " file=" + filename + " (" + casBytes.length + " bytes)");
+
+ HttpResult res = postMultipart(
+ importUrl,
+ fields,
+ "file",
+ filename,
+ "application/xml",
+ casBytes
+ );
+ logger.info("[IMPORT][HTTP] status=" + res.status
+ + (res.locationHeader != null ? " Location=" + res.locationHeader : "")
+ + (res.body != null && !res.body.isBlank() ? " body=" + res.body : ""));
+ return res;
+ }
+
}
diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java
index 3d2671f7..1fcfd6d9 100644
--- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java
+++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java
@@ -12,4 +12,5 @@ public class SettingsConfig {
private EmbeddingsConfig embeddings;
private AuthConfig authentication;
private MCPConfig mcp = new MCPConfig();
+ private boolean enablePathImport = false;
}
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java
index 97534c64..6037916a 100644
--- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java
+++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java
@@ -93,7 +93,7 @@ public static void main(String[] args) throws IOException {
() -> new AnnotationConfigApplicationContext(SpringConfig.class),
(ex) -> logger.fatal("========== [ABORT] ==========\nThe Application context couldn't be established. " +
"This is very likely due to a missing/invalid database connection. UCE will have to shutdown."));
- if(context == null) return;
+ if (context == null) return;
logger.info("Loaded application context and services.");
// Execute the external database scripts
@@ -115,7 +115,7 @@ public static void main(String[] args) throws IOException {
logger.info(languageResource.get("search"));
// Load in and test the model resources for the Analysis Engine
- if(SystemStatus.UceConfig.getSettings().getAnalysis().isEnableAnalysisEngine()){
+ if (SystemStatus.UceConfig.getSettings().getAnalysis().isEnableAnalysisEngine()) {
var modelResources = new ModelResources();
var ttlabScorer = new TTLabScorerInfo();
var cohMetrixInfo = new CohMetrixInfo();
@@ -134,7 +134,7 @@ public static void main(String[] args) throws IOException {
SystemStatus.LexiconIsCalculating = true;
var lexiconService = context.getBean(LexiconService.class);
var addedLexiconEntries = 0;
- if(forceLexicalization) addedLexiconEntries = lexiconService.updateLexicon(true);
+ if (forceLexicalization) addedLexiconEntries = lexiconService.updateLexicon(true);
else addedLexiconEntries = lexiconService.checkForUpdates();
logger.info("Finished updating the lexicon. Added new entries: " + addedLexiconEntries);
SystemStatus.LexiconIsCalculating = false;
@@ -142,23 +142,23 @@ public static void main(String[] args) throws IOException {
logger.info("Checking if we can or should update any linkables... (this may take a moment depending on the time of the last update. Runs asynchronous.)");
CompletableFuture.runAsync(() -> {
- try{
+ try {
var result = context.getBean(PostgresqlDataInterface_Impl.class).callLogicalLinksRefresh();
logger.info("Finished updating the linkables. Updated linkables: " + result);
- } catch (Exception ex){
+ } catch (Exception ex) {
logger.error("There was an error trying to refresh linkables in the startup of the web app. App starts normally though.");
}
});
logger.info("Checking if we can or should update any geoname locations... (this may take a moment depending on the time of the last update. Runs asynchronous.)");
CompletableFuture.runAsync(() -> {
- try{
+ try {
var result = context.getBean(PostgresqlDataInterface_Impl.class).callGeonameLocationRefresh();
logger.info("Finished updating the geoname locations. Updated locations: " + result);
logger.info("Trying to refresh the timeline map cache...");
context.getBean(MapService.class).refreshCachedTimelineMap(false);
logger.info("Finished refreshing the timeline map.");
- } catch (Exception ex){
+ } catch (Exception ex) {
logger.error("There was an error trying to refresh geoname locations in the startup of the web app. App starts normally though.");
}
});
@@ -176,8 +176,7 @@ public static void main(String[] args) throws IOException {
// We use the externalLocation method so that the files in the public folder are hot reloaded
if (commonConfig.useExternalPublicLocation()) {
config.staticFiles.add(commonConfig.getPublicLocation(), Location.EXTERNAL);
- }
- else {
+ } else {
config.staticFiles.add("/public", Location.CLASSPATH);
}
logger.info("Setup FreeMarker templates and public folders.");
@@ -200,8 +199,7 @@ public static void main(String[] args) throws IOException {
ExceptionUtils.tryCatchLog(() -> initMCP(registry, config),
(ex) -> logger.error("There was a problem initializing the MCP server, web service will be shut down.", ex));
logger.info("MCP server initialized.");
- }
- else {
+ } else {
logger.info("MCP server is disabled and will not be initialized.");
}
config.jsonMapper(mapper);
@@ -350,174 +348,177 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi
List groups = modelResources.getGroupedModelObjects();
config.router.apiBuilder(() -> {
- before(ctx -> {
- ctx.res().setCharacterEncoding("UTF-8");
- // Setup and log all API calls with some information. We don't want to log file uploads, since it would
- // destroy the file body stream.
- if (!(ctx.contentType() != null && ctx.contentType().contains("multipart/form-data"))) {
- ctx.attribute("id", UUID.randomUUID().toString());
- logger.info("Received API call: ID={}, IP={}, Method={}, URI={}, QUERY={}, BODY={}",
- ctx.attribute("id"), ctx.ip(), ctx.method().name(), ctx.url(), ctx.queryString(), ctx.body());
-
- // Should we log to db as well?
- if (commonConfig.getLogToDb() && SystemStatus.PostgresqlDbStatus.isAlive()) {
- var uceLog = new UCELog(ctx.ip(), ctx.method().name(), ctx.url(), ctx.body(), ctx.queryString());
- ExceptionUtils.tryCatchLog(
- () -> context.getBean(PostgresqlDataInterface_Impl.class).saveUceLog(uceLog),
- (ex) -> logger.error("Error storing a log to the database: ", ex));
- logger.info("Last log was also logged to the db with id " + uceLog.getId());
- }
- } else {
- // Else we have a form-data upload. We handle those explicitly.
- // Set the multipart data configs for uploads
- ctx.req().setAttribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/tmp"));
- }
-
- // Always inject the current system config into all UI templates
- RequestContextHolder.setUceConfigHolder(SystemStatus.UceConfig);
-
- // Check if the request contains a language parameter
- var languageResources = LanguageResources.fromRequest(ctx);
- ctx.header("Content-Language", languageResources.getDefaultLanguage());
- RequestContextHolder.setLanguageResources(languageResources);
-
- // Check if we have an authenticated user in the session and inject it into the template
- if (SystemStatus.UceConfig.getSettings().getAuthentication().isActivated()) {
- var user = SessionManager.getUserFromRequest(ctx);
- RequestContextHolder.setAuthenticatedUceUser(user);
- }
- });
+ before(ctx -> {
+ ctx.res().setCharacterEncoding("UTF-8");
+ // Setup and log all API calls with some information. We don't want to log file uploads, since it would
+ // destroy the file body stream.
+ if (!(ctx.contentType() != null && ctx.contentType().contains("multipart/form-data"))) {
+ ctx.attribute("id", UUID.randomUUID().toString());
+ logger.info("Received API call: ID={}, IP={}, Method={}, URI={}, QUERY={}, BODY={}",
+ ctx.attribute("id"), ctx.ip(), ctx.method().name(), ctx.url(), ctx.queryString(), ctx.body());
+
+ // Should we log to db as well?
+ if (commonConfig.getLogToDb() && SystemStatus.PostgresqlDbStatus.isAlive()) {
+ var uceLog = new UCELog(ctx.ip(), ctx.method().name(), ctx.url(), ctx.body(), ctx.queryString());
+ ExceptionUtils.tryCatchLog(
+ () -> context.getBean(PostgresqlDataInterface_Impl.class).saveUceLog(uceLog),
+ (ex) -> logger.error("Error storing a log to the database: ", ex));
+ logger.info("Last log was also logged to the db with id " + uceLog.getId());
+ }
+ } else {
+ // Else we have a form-data upload. We handle those explicitly.
+ // Set the multipart data configs for uploads
+ ctx.req().setAttribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/tmp"));
+ }
+
+ // Always inject the current system config into all UI templates
+ RequestContextHolder.setUceConfigHolder(SystemStatus.UceConfig);
+
+ // Check if the request contains a language parameter
+ var languageResources = LanguageResources.fromRequest(ctx);
+ ctx.header("Content-Language", languageResources.getDefaultLanguage());
+ RequestContextHolder.setLanguageResources(languageResources);
+
+ // Check if we have an authenticated user in the session and inject it into the template
+ if (SystemStatus.UceConfig.getSettings().getAuthentication().isActivated()) {
+ var user = SessionManager.getUserFromRequest(ctx);
+ RequestContextHolder.setAuthenticatedUceUser(user);
+ }
+ });
+
+ // Landing page
+ get("/", ctx -> {
+ var model = new HashMap();
+ model.put("title", SystemStatus.UceConfig.getMeta().getName());
+ model.put("corpora", context.getBean(PostgresqlDataInterface_Impl.class)
+ .getAllCorpora()
+ .stream().map(Corpus::getViewModel)
+ .toList());
+ model.put("commonConf", commonConfig);
+ model.put("isSparqlAlive", SystemStatus.JenaSparqlStatus.isAlive());
+ model.put("isAuthAlive", SystemStatus.AuthenticationService.isAlive());
+ model.put("isDbAlive", SystemStatus.PostgresqlDbStatus.isAlive());
+ model.put("isRagAlive", SystemStatus.RagServiceStatus.isAlive());
+ model.put("isS3StorageAlive", SystemStatus.S3StorageStatus.isAlive());
+ model.put("isLexiconCalculating", SystemStatus.LexiconIsCalculating);
+ model.put("alphabetList", StringUtils.getAlphabetAsList());
+ model.put("lexiconEntriesCount", context.getBean(LexiconService.class).countLexiconEntries());
+ model.put("lexiconizableAnnotations", LexiconService.lexiconizableAnnotations);
+ model.put("uceVersion", commonConfig.getUceVersion());
+ model.put("modelGroups", groups);
+ model.put("ttlabScorer", taInputMap);
+ model.put("cohMetrix", cohMetrixMap);
+
+ // The vm files are located under the resources directory
+ ctx.render("index.ftl", model);
+ });
+
+ // Potential imprint
+ get("/imprint", ctx -> {
+ var model = new HashMap();
+ model.put("imprint", SystemStatus.UceConfig.getCorporate().getImprint());
+ ctx.render("imprint.ftl", model);
+ });
+
+ // A document reader view
+ get("/documentReader", (ctx) -> (registry.get(DocumentApi.class)).getSingleDocumentReadView(ctx));
+
+ // A corpus World View
+ get("/globe", (ctx) -> (registry.get(DocumentApi.class)).get3dGlobe(ctx));
+
+
+ path("/auth", () -> {
+ get("/login", (ctx) -> (registry.get(AuthenticationApi.class)).loginCallback(ctx));
+ get("/logout", (ctx) -> (registry.get(AuthenticationApi.class)).logoutCallback(ctx));
+ });
+
+ // API routes
+ path("/api", () -> {
+ before("/*", (ctx) -> {
+ });
- // Landing page
- get("/", ctx -> {
- var model = new HashMap();
- model.put("title", SystemStatus.UceConfig.getMeta().getName());
- model.put("corpora", context.getBean(PostgresqlDataInterface_Impl.class)
- .getAllCorpora()
- .stream().map(Corpus::getViewModel)
- .toList());
- model.put("commonConf", commonConfig);
- model.put("isSparqlAlive", SystemStatus.JenaSparqlStatus.isAlive());
- model.put("isAuthAlive", SystemStatus.AuthenticationService.isAlive());
- model.put("isDbAlive", SystemStatus.PostgresqlDbStatus.isAlive());
- model.put("isRagAlive", SystemStatus.RagServiceStatus.isAlive());
- model.put("isS3StorageAlive", SystemStatus.S3StorageStatus.isAlive());
- model.put("isLexiconCalculating", SystemStatus.LexiconIsCalculating);
- model.put("alphabetList", StringUtils.getAlphabetAsList());
- model.put("lexiconEntriesCount", context.getBean(LexiconService.class).countLexiconEntries());
- model.put("lexiconizableAnnotations", LexiconService.lexiconizableAnnotations);
- model.put("uceVersion", commonConfig.getUceVersion());
- model.put("modelGroups", groups);
- model.put("ttlabScorer", taInputMap);
- model.put("cohMetrix", cohMetrixMap);
-
- // The vm files are located under the resources directory
- ctx.render("index.ftl", model);
+ path("/ie", () -> {
+ post("/upload/uima", (ctx) -> (registry.get(ImportExportApi.class)).uploadUIMA(ctx));
+ get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx));
+ post("/import/path", (ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromPath(ctx));
+ post("/import/upload",(ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromUpload(ctx));
+ });
+
+ path("/wiki", () -> {
+ get("/page", (ctx) -> (registry.get(WikiApi.class)).getPage(ctx));
+ get("/annotation", (ctx) -> (registry.get(WikiApi.class)).getAnnotation(ctx));
+ path("/linkable", () -> {
+ post("/node", (ctx) -> (registry.get(WikiApi.class)).getLinkableNode(ctx));
+ });
+ path("/lexicon", () -> {
+ post("/entries", (ctx) -> (registry.get(WikiApi.class)).getLexicon(ctx));
+ post("/occurrences", (ctx) -> (registry.get(WikiApi.class)).getOccurrencesOfLexiconEntry(ctx));
});
+ post("/queryOntology", (ctx) -> (registry.get(WikiApi.class)).queryOntology(ctx));
+ });
- // Potential imprint
- get("/imprint", ctx -> {
- var model = new HashMap();
- model.put("imprint", SystemStatus.UceConfig.getCorporate().getImprint());
- ctx.render("imprint.ftl", model);
+ path("/corpus", () -> {
+ get("/inspector", (ctx) -> (registry.get(DocumentApi.class)).getCorpusInspectorView(ctx));
+ get("/documentsList", (ctx) -> (registry.get(DocumentApi.class)).getDocumentListOfCorpus(ctx));
+ path("/map", () -> {
+ post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx));
+ post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx));
});
+ });
- // A document reader view
- get("/documentReader", (ctx) -> (registry.get(DocumentApi.class)).getSingleDocumentReadView(ctx));
+ path("/search", () -> {
+ post("/default", (ctx) -> (registry.get(SearchApi.class)).search(ctx));
+ post("/semanticRole", (ctx) -> (registry.get(SearchApi.class)).semanticRoleSearch(ctx));
+ post("/layered", (ctx) -> (registry.get(SearchApi.class)).layeredSearch(ctx));
+ get("/active/page", (ctx) -> (registry.get(SearchApi.class)).activeSearchPage(ctx));
+ get("/active/sort", (ctx) -> (registry.get(SearchApi.class)).activeSearchSort(ctx));
+ get("/semanticRole/builder", (ctx) -> (registry.get(SearchApi.class)).getSemanticRoleBuilderView(ctx));
+ });
- // A corpus World View
- get("/globe", (ctx) -> (registry.get(DocumentApi.class)).get3dGlobe(ctx));
+ path("/analysis", () -> {
+ post("/runPipeline", (ctx) -> (registry.get(AnalysisApi.class)).runPipeline(ctx));
+ get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx));
+ post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx));
+ post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx));
+ post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path
+ });
+ path("/corpusUniverse", () -> {
+ // Gets a corpus universe view
+ get("/new", (ctx) -> (registry.get(CorpusUniverseApi.class)).getCorpusUniverseView(ctx));
+ post("/fromSearch", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromSearch(ctx));
+ post("/fromCorpus", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromCorpus(ctx));
+ get("/nodeInspectorContent", (ctx) -> (registry.get(CorpusUniverseApi.class)).getNodeInspectorContentView(ctx));
+ });
- path("/auth", () -> {
- get("/login", (ctx) -> (registry.get(AuthenticationApi.class)).loginCallback(ctx));
- get("/logout", (ctx) -> (registry.get(AuthenticationApi.class)).logoutCallback(ctx));
- });
+ path("/document", () -> {
+ get("/reader/pagesList", (ctx) -> (registry.get(DocumentApi.class)).getPagesListView(ctx));
+ get("/uceMetadata", (ctx) -> (registry.get(DocumentApi.class)).getUceMetadataOfDocument(ctx));
+ get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx));
+ get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx));
+ get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx));
+ get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx));
+ get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx));
+ get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx));
+ get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx));
+ get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx));
+ get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx));
+ delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx));
+ get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx));
+ get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx));
+ });
- // API routes
- path("/api", () -> {
- before("/*", (ctx) -> {
- });
-
- path("/ie", () -> {
- post("/upload/uima", (ctx) -> (registry.get(ImportExportApi.class)).uploadUIMA(ctx));
- get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx));
- });
-
- path("/wiki", () -> {
- get("/page", (ctx) -> (registry.get(WikiApi.class)).getPage(ctx));
- get("/annotation", (ctx) -> (registry.get(WikiApi.class)).getAnnotation(ctx));
- path("/linkable", () -> {
- post("/node", (ctx) -> (registry.get(WikiApi.class)).getLinkableNode(ctx));
- });
- path("/lexicon", () -> {
- post("/entries", (ctx) -> (registry.get(WikiApi.class)).getLexicon(ctx));
- post("/occurrences", (ctx) -> (registry.get(WikiApi.class)).getOccurrencesOfLexiconEntry(ctx));
- });
- post("/queryOntology", (ctx) -> (registry.get(WikiApi.class)).queryOntology(ctx));
- });
-
- path("/corpus", () -> {
- get("/inspector", (ctx) -> (registry.get(DocumentApi.class)).getCorpusInspectorView(ctx));
- get("/documentsList", (ctx) -> (registry.get(DocumentApi.class)).getDocumentListOfCorpus(ctx));
- path("/map", () -> {
- post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx));
- post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx));
- });
- });
-
- path("/search", () -> {
- post("/default", (ctx) -> (registry.get(SearchApi.class)).search(ctx));
- post("/semanticRole", (ctx) -> (registry.get(SearchApi.class)).semanticRoleSearch(ctx));
- post("/layered", (ctx) -> (registry.get(SearchApi.class)).layeredSearch(ctx));
- get("/active/page", (ctx) -> (registry.get(SearchApi.class)).activeSearchPage(ctx));
- get("/active/sort", (ctx) -> (registry.get(SearchApi.class)).activeSearchSort(ctx));
- get("/semanticRole/builder", (ctx) -> (registry.get(SearchApi.class)).getSemanticRoleBuilderView(ctx));
- });
-
- path("/analysis", () -> {
- post("/runPipeline", (ctx) -> (registry.get(AnalysisApi.class)).runPipeline(ctx));
- get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx));
- post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx));
- post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx));
- });
-
- path("/corpusUniverse", () -> {
- // Gets a corpus universe view
- get("/new", (ctx) -> (registry.get(CorpusUniverseApi.class)).getCorpusUniverseView(ctx));
- post("/fromSearch", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromSearch(ctx));
- post("/fromCorpus", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromCorpus(ctx));
- get("/nodeInspectorContent", (ctx) -> (registry.get(CorpusUniverseApi.class)).getNodeInspectorContentView(ctx));
- });
-
- path("/document", () -> {
- get("/reader/pagesList", (ctx) -> (registry.get(DocumentApi.class)).getPagesListView(ctx));
- get("/uceMetadata", (ctx) -> (registry.get(DocumentApi.class)).getUceMetadataOfDocument(ctx));
- get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx));
- get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx));
- get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx));
- get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx));
- get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx));
- get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx));
- get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx));
- get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx));
- get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx));
- delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx));
- get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx));
- get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx));
- });
-
- path("/rag", () -> {
- get("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx));
- // NOTE we allow also "post" here, as the system prompt can get quite long...
- post("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx));
- post("/postUserMessage", (ctx) -> (registry.get(RAGApi.class)).postUserMessage(ctx));
- get("/messages", (ctx) -> (registry.get(RAGApi.class)).getMessagesForChat(ctx));
- get("/plotTsne", (ctx) -> (registry.get(RAGApi.class)).getTsnePlot(ctx));
- get("/sentenceEmbeddings", (ctx) -> (registry.get(RAGApi.class)).getSentenceEmbeddings(ctx));
- });
- });
+ path("/rag", () -> {
+ get("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx));
+ // NOTE we allow also "post" here, as the system prompt can get quite long...
+ post("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx));
+ post("/postUserMessage", (ctx) -> (registry.get(RAGApi.class)).postUserMessage(ctx));
+ get("/messages", (ctx) -> (registry.get(RAGApi.class)).getMessagesForChat(ctx));
+ get("/plotTsne", (ctx) -> (registry.get(RAGApi.class)).getTsnePlot(ctx));
+ get("/sentenceEmbeddings", (ctx) -> (registry.get(RAGApi.class)).getSentenceEmbeddings(ctx));
});
+ });
+ });
}
private static JsonMapper getJsonMapper() {
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java
index 866242bc..16fbd9eb 100644
--- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java
+++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java
@@ -3,6 +3,7 @@
import com.google.gson.Gson;
import freemarker.template.Configuration;
import io.javalin.http.Context;
+import io.javalin.http.Handler;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.context.ApplicationContext;
@@ -10,8 +11,10 @@
import org.texttechnologylab.uce.analysis.RunDUUIPipeline;
import org.texttechnologylab.uce.analysis.modules.DUUIInformation;
import org.texttechnologylab.uce.common.annotations.auth.Authentication;
+import org.texttechnologylab.uce.common.models.authentication.UceUser;
import org.texttechnologylab.uce.common.models.dto.AnalysisRequestDto;
import org.texttechnologylab.uce.common.models.dto.HistoryRequestDto;
+import org.texttechnologylab.uce.web.SessionManager;
import java.util.HashMap;
import java.util.List;
@@ -55,11 +58,19 @@ public void runPipeline(Context ctx) {
model.put("inputStance", inputStance);
model.put("inputLLM", inputLLM);
+ UceUser user = SessionManager.getUserFromRequest(ctx);
+ String userId = (user != null) ? user.getUsername() : "user-unknown";
+ RunDUUIPipeline.setThreadLocalUserId(userId);
+
RunDUUIPipeline pipeline = new RunDUUIPipeline();
- DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM);
+ RunDUUIPipeline.AnalysisResponse resp =
+ pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim,
+ inputCoherence, inputStance, inputLLM);
+ DUUIInformation DataRequest = resp.duuiInformation;
model.put("DUUI", DataRequest);
model.put("SuccessRequest", true);
model.put("modelGroups", DataRequest.getModelGroups());
+ model.put("analysisId", resp.analysisId);
// set history
history.addDuuiInformation(String.valueOf(counter), DataRequest);
@@ -180,5 +191,38 @@ public void callHistoryText(Context ctx) {
ctx.render("defaultError.ftl");
}
}
-
+ // IMPORT ROUTE
+ @Authentication(required = Authentication.Requirement.LOGGED_IN,
+ route = Authentication.RouteTypes.POST,
+ path = "/api/analysis/importCas"
+ )
+ public Handler importCas = ctx -> {
+ try {
+ String analysisId = ctx.queryParam("analysisId");
+ if (analysisId == null || analysisId.isBlank()) {
+ ctx.status(400).result("Missing analysisId");
+ return;
+ }
+
+ // Lookup cached session
+ RunDUUIPipeline.AnalysisSession session = RunDUUIPipeline.getCachedSession(analysisId);
+ if (session == null) {
+ ctx.status(404).result("No cached CAS found for analysisId=" + analysisId);
+ return;
+ }
+
+ // send to importer
+ long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=...
+ String importPath = "/api/ie/upload/uima";
+ String importUrl = ctx.scheme() + "://" + ctx.host() + importPath;
+
+ RunDUUIPipeline.sendToImporterViaHttp(importUrl, analysisId, corpusId, analysisId, null);
+ ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId);
+ } catch (NumberFormatException nfe) {
+ ctx.status(400).result("corpusId is required and must be a number");
+ } catch (Exception e) {
+ logger.error("Error importing CAS", e);
+ ctx.status(500).result("Error importing CAS: " + e.getMessage());
+ }
+ };
}
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java
index a47013a9..bc43a31e 100644
--- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java
+++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java
@@ -1,35 +1,45 @@
package org.texttechnologylab.uce.web.routes;
import com.google.gson.Gson;
-import com.google.gson.JsonIOException;
-import com.google.gson.JsonSyntaxException;
+import com.google.gson.GsonBuilder;
import io.javalin.http.Context;
+import io.javalin.http.UploadedFile;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.context.ApplicationContext;
import org.texttechnologylab.uce.common.config.CorpusConfig;
+import org.texttechnologylab.uce.common.config.corpusConfig.CorpusAnnotationConfig;
+import org.texttechnologylab.uce.common.config.corpusConfig.OtherConfig;
+import org.texttechnologylab.uce.common.config.corpusConfig.TaxonConfig;
import org.texttechnologylab.uce.common.exceptions.DatabaseOperationException;
import org.texttechnologylab.uce.common.exceptions.ExceptionUtils;
-import org.texttechnologylab.uce.common.models.corpus.Corpus;
+import org.texttechnologylab.uce.common.models.imp.ImportStatus;
+import org.texttechnologylab.uce.common.models.imp.UCEImport;
import org.texttechnologylab.uce.common.services.PostgresqlDataInterface_Impl;
import org.texttechnologylab.uce.common.services.S3StorageService;
import org.texttechnologylab.uce.common.utils.StringUtils;
import org.texttechnologylab.uce.corpusimporter.Importer;
+import java.io.IOException;
+import java.io.InputStream;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
import java.util.HashMap;
import java.util.Map;
+import java.util.UUID;
import java.util.concurrent.CompletableFuture;
+
public class ImportExportApi implements UceApi {
+ private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class);
private S3StorageService s3StorageService;
private PostgresqlDataInterface_Impl db;
private ApplicationContext serviceContext;
- private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class);
- private static Gson gson = new Gson();
-
public ImportExportApi(ApplicationContext serviceContext) {
this.serviceContext = serviceContext;
this.s3StorageService = serviceContext.getBean(S3StorageService.class);
@@ -54,7 +64,7 @@ public void downloadUIMA(Context ctx) {
ctx.res().setContentType(contentType);
ctx.res().setHeader("Content-Disposition", "attachment; filename=\"" + objectName + "." + StringUtils.getExtensionByContentType(contentType) + "\"");
- var buffer = new byte[8192];
+ var buffer = new byte[8192];
int bytesRead;
while ((bytesRead = s3Stream.read(buffer)) != -1) {
out.write(buffer, 0, bytesRead);
@@ -74,9 +84,11 @@ public void uploadUIMA(Context ctx) {
// First, we need to know which corpus this document should be added to.
var corpusId = ExceptionUtils.tryCatchLog(
() -> Long.parseLong(new String(ctx.req().getPart("corpusId").getInputStream().readAllBytes(), StandardCharsets.UTF_8)),
- (ex) -> logger.error("Error getting the corpusId this document should be added to. Aborting.", ex));
+ (ex) -> logger.error("Error getting corpusId from request.", ex));
+
if (corpusId == null) {
- ctx.result("Parameter corpusId didn't exist. Without it, the document cannot be uploaded.");
+ ctx.status(400);
+ ctx.result("Parameter corpusId didn't exist; cannot upload document.");
return;
}
@@ -91,29 +103,12 @@ public void uploadUIMA(Context ctx) {
var corpus = ExceptionUtils.tryCatchLog(
() -> db.getCorpusById(corpusId),
- (ex) -> logger.error("Couldn't fetch corpus when uploading new document to corpusId " + corpusId, ex));
+ (ex) -> logger.error("Couldn't fetch corpus with id " + corpusId, ex));
+
if (corpus == null) {
- var corpusConfigRaw = ExceptionUtils.tryCatchLog(
- () -> new String(ctx.req().getPart("corpusConfig").getInputStream().readAllBytes(), StandardCharsets.UTF_8),
- (ex) -> logger.error("Error getting the corpusConfig that should be used for this document. Aborting.", ex));
- if (corpusConfigRaw == null) {
- ctx.result("Corpus with id " + corpusId + " wasn't found in the database; no config was provided; can't upload document.");
- return;
- }
- logger.info("Corpus with id " + corpusId + " wasn't found in the database; creating a new corpus with the provided config.");
- try {
- var corpusConfig = gson.fromJson(corpusConfigRaw, CorpusConfig.class);
- corpus = new Corpus();
- var corpusReturn = Importer.CreateDBCorpus(corpus, corpusConfig, this.db);
- if (corpusReturn != null) {
- corpus = corpusReturn;
- }
- } catch (JsonIOException | JsonSyntaxException e) {
- ctx.result("The corpusConfig provided is not properly formatted.");
- } catch (DatabaseOperationException e) {
- ctx.result("Error creating a new corpus in the database: " + e.getMessage());
- return;
- }
+ ctx.status(404);
+ ctx.result("Corpus with id " + corpusId + " wasn't found in the database.");
+ return;
}
// TODO just use 1 as default? will throw an error if this is null otherwise...
@@ -122,10 +117,9 @@ public void uploadUIMA(Context ctx) {
try (var input = ctx.req().getPart("file").getInputStream()) {
var fileName = ctx.req().getPart("file").getSubmittedFileName();
// Import the doc in the background
- final Corpus corpus1 = corpus;
var importFuture = CompletableFuture.supplyAsync(() -> {
try {
- return importer.storeUploadedXMIToCorpusAsync(input, corpus1, fileName, documentId);
+ return importer.storeUploadedXMIToCorpusAsync(input, corpus, fileName, documentId);
} catch (DatabaseOperationException e) {
throw new RuntimeException(e);
}
@@ -139,6 +133,7 @@ public void uploadUIMA(Context ctx) {
if (acceptedContentType != null && acceptedContentType.equals("application/json")) {
Map apiResult = new HashMap<>();
apiResult.put("document_id", newDocumentId);
+// ctx.contentType("application/json"); //redundant
ctx.json(apiResult);
return;
}
@@ -149,6 +144,169 @@ public void uploadUIMA(Context ctx) {
ctx.status(500);
ctx.result("Error uploading a file: " + e.getMessage());
}
- };
+ }
+
+
+ public void importCorpusFromPath(Context ctx) {
+ try {
+ String path = ctx.formParam("path");
+ String numThreadStr = ctx.formParam("numThreads");
+ int numThreads = (numThreadStr != null && !numThreadStr.isBlank()) ? Integer.parseInt(numThreadStr) : 1;
+ String casView = ctx.formParam("casView");
+
+ if (casView != null && casView.isBlank()) {
+ casView = null;
+ }
+
+ if (path == null || path.isBlank()) {
+ ctx.status(400).result("Path is required");
+ return;
+ }
+
+ String importId = UUID.randomUUID().toString();
+ int importerNumber = 1;
+ Importer importer = new Importer(serviceContext, path, importerNumber, importId, casView);
+ UCEImport uceImport = new UCEImport(importId, path, ImportStatus.STARTING);
+ Integer fileCount = ExceptionUtils.tryCatchLog(importer::getXMICountInPath,
+ (ex) -> logger.warn("There was an IO error counting the importable UIMA files - the import will probably fail at some point.", ex));
+ uceImport.setTotalDocuments(fileCount == null ? -1 : fileCount);
+ db.saveOrUpdateUceImport(uceImport);
+ CompletableFuture.runAsync(() -> {
+ try {
+ importer.start(numThreads);
+ } catch (DatabaseOperationException e) {
+ logger.error("Error during asynchronous corpus import", e);
+ }
+ });
+ ctx.status(200).result("Import started. Import ID: " + importId);
+ } catch (DatabaseOperationException e) {
+ logger.error("Error when creating saving/updating to database" + e);
+ ctx.status(500).result("Database error initiating corpus import" + e.getMessage());
+
+ } catch (Exception e) {
+ logger.error("Error initiating corpus import", e);
+ ctx.status(500).result("Error initiating import: " + e.getMessage());
+ }
+
+ }
+
+ public void importCorpusFromUpload(Context ctx){
+ try{
+ String importId = UUID.randomUUID().toString();
+ Path rootDir = java.nio.file.Paths.get(System.getProperty("java.io.tmpdir"), "uce_uploads", importId);
+ Path inputDir = rootDir.resolve("input");
+ Files.createDirectories(inputDir);
+
+ var validFiles = ctx.uploadedFiles("files").stream()
+ .filter(f -> f.size() > 0 && f.filename() != null && !f.filename().isBlank())
+ .toList();
+
+ if (validFiles.isEmpty()) {
+ ctx.status(400).result("No files selected. Please select at least one XMI file or archive.");
+ return;
+ }
+
+ for(UploadedFile uploadedFile : ctx.uploadedFiles("files")){
+ try(InputStream input = uploadedFile.content()){
+ Files.copy(input,inputDir.resolve(uploadedFile.filename()), StandardCopyOption.REPLACE_EXISTING);
+ }
+ }
+
+ CorpusConfig config = new CorpusConfig();
+ String name = ctx.formParam("name");
+ if (name == null || name.isBlank()){
+ ctx.status(400).result("No corpus name given");
+ }
+ config.setName(name);
+ String author = ctx.formParam("author");
+ if (author == null || author.isBlank()) {
+ ctx.status(400).result("Corpus Author is required.");
+ return;
+ }
+ config.setAuthor(author);
+ String language = ctx.formParam("language");
+ if (language == null || language.isBlank()) {
+ ctx.status(400).result("Corpus Language is required.");
+ return;
+ }
+ config.setLanguage(language);
+ config.setDescription(ctx.formParam("description"));
+ String addToExistingParam = ctx.formParam("addToExistingCorpus");
+ boolean addToExisting = addToExistingParam != null && Boolean.parseBoolean(addToExistingParam);
+ config.setAddToExistingCorpus(addToExisting);
+
+// Annotations
+ CorpusAnnotationConfig annotations = new CorpusAnnotationConfig();
+ annotations.setSentence(ctx.formParam("sentence") != null);
+ annotations.setLemma(ctx.formParam("lemma") != null);
+ annotations.setNamedEntity(ctx.formParam("namedEntity") != null);
+ annotations.setSentiment(ctx.formParam("sentiment") != null);
+ annotations.setEmotion(ctx.formParam("emotion") != null);
+ annotations.setTime(ctx.formParam("time") != null);
+ annotations.setGeoNames(ctx.formParam("geoNames") != null);
+ annotations.setWikipediaLink(ctx.formParam("wikipediaLink") != null);
+ annotations.setImage(ctx.formParam("image") != null);
+ annotations.setUnifiedTopic(ctx.formParam("unifiedTopic") != null);
+ annotations.setOCRPage(ctx.formParam("OCRPage") != null);
+ annotations.setOCRParagraph(ctx.formParam("OCRParagraph") != null);
+ annotations.setOCRBlock(ctx.formParam("OCRBlock") != null);
+ annotations.setOCRLine(ctx.formParam("OCRLine") != null);
+
+ TaxonConfig taxonConfig = new TaxonConfig();
+ taxonConfig.setAnnotated(ctx.formParam("taxonAnnotated") != null);
+ taxonConfig.setBiofidOnthologyAnnotated(ctx.formParam("biofidOnthologyAnnotated") != null);
+
+ annotations.setTaxon(taxonConfig);
+ config.setAnnotations(annotations);
+
+// Other Settings
+ OtherConfig otherConfig = new OtherConfig();
+ otherConfig.setEnableEmbeddings(ctx.formParam("enableEmbeddings") != null);
+ otherConfig.setEnableRAGBot(ctx.formParam("enableRAGBot") != null);
+ otherConfig.setIncludeKeywordDistribution(ctx.formParam("includeKeywordDistribution") != null);
+ otherConfig.setEnableS3Storage(ctx.formParam("enableS3Storage") != null);
+ config.setOther(otherConfig);
+
+ Gson gson = new GsonBuilder().setPrettyPrinting().create();
+ String jsonString = gson.toJson(config);
+ Files.writeString(rootDir.resolve("corpusConfig.json"),jsonString,StandardCharsets.UTF_8);
+
+ String numThreadStr = ctx.formParam("numThreads");
+ int numThreads = (numThreadStr != null && !numThreadStr.isBlank()) ? Integer.parseInt(numThreadStr) : 1;
+ String casView = ctx.formParam("casView");
+ if(casView != null && casView.isBlank()) casView = null;
+ int importerNumber = 1;
+ Importer importer = new Importer(serviceContext,rootDir.toString(),importerNumber,importId,casView);
+
+ String logTitle = (addToExisting ? "ADD_TO:" : "UPLOAD_NEW:") + name;
+ UCEImport uceImport = new UCEImport(importId,logTitle,ImportStatus.STARTING);
+ Integer fileCount = ExceptionUtils.tryCatchLog(importer::getXMICountInPath,
+ (ex) -> logger.warn("IO Error counting upload files.",ex));
+ uceImport.setTotalDocuments(fileCount == null ? -1 : fileCount);
+ db.saveOrUpdateUceImport(uceImport);
+ CompletableFuture.runAsync(() -> {
+ try{
+ importer.start(numThreads);
+ } catch (DatabaseOperationException e) {
+ logger.error("Error during asynchronous corpus uplaod import",e);
+ }finally {
+ try {
+ org.apache.commons.io.FileUtils.deleteDirectory(rootDir.toFile());
+ } catch (IOException e) {
+ logger.warn("Could not delete temp upload dir: " + rootDir,e);
+ }
+ }
+ });
+
+ ctx.status(200).result("Upload sucessfull. Import started with ID: " + importId);
+
+ } catch (IOException e) {
+ logger.error("Error handling file upload import", e);
+ ctx.status(500).result("Error during upload " + e.getMessage());
+ } catch (DatabaseOperationException e) {
+ logger.error("Error saving/updating database during Uce Import", e);
+ ctx.status(500).result("Error during saving/updating database " + e.getMessage());
+ }
+ }
}