diff --git a/uce.portal/resources/templates/corpus/corpusInspector.ftl b/uce.portal/resources/templates/corpus/corpusInspector.ftl index 484c6790..c074f490 100644 --- a/uce.portal/resources/templates/corpus/corpusInspector.ftl +++ b/uce.portal/resources/templates/corpus/corpusInspector.ftl @@ -17,6 +17,17 @@ +
+ +
diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 98e75ac8..987da127 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -3,7 +3,8 @@
-
${uceConfig.getMeta().getName()?trim!"-"}
+
${uceConfig.getMeta().getName()?trim!"-"}
@@ -15,8 +16,21 @@
-

${languageResource.get("corpora")}

+
+

${languageResource.get("corpora")}

+ <#if uceConfig.settings.enablePathImport?? && uceConfig.settings.enablePathImport> + + + +
+ +
<#if corpora?size == 0>
@@ -33,7 +47,8 @@ data-id="${corpusVm.getCorpus().getId()}"> ${corpusVm.getCorpus().getName()?trim} -

${corpusVm.getCorpus().getAuthor()}

+

${corpusVm.getCorpus().getAuthor()}

+
+ +<#--Modal for importing files via a path--> + + +<#--Modal for uploading files--> + + + \ No newline at end of file diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 7e012f0c..00cd6d5c 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,4 +1,23 @@ +<#if analysisId??> +
+ +
+ + <#if DUUI??> <#if DUUI.modelGroups?has_content> <#if DUUI.isTopic> diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java index 3cb3d72a..e016d4ae 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java @@ -129,8 +129,8 @@ public JCas getLanguage(String inputText) throws Exception { public JCas getSentences(JCas cas) throws Exception { HashMap spacyUrls = new HashMap<>(); - spacyUrls.put("Spacy", "http://spacy-cohmetrix.service.component.duui.texttechnologylab.org"); -// spacyUrls.put("Spacy", "http://spacy.service.component.duui.texttechnologylab.org"); +// spacyUrls.put("Spacy", "http://spacy-cohmetrix.service.component.duui.texttechnologylab.org"); + spacyUrls.put("Spacy", "http://spacy.service.component.duui.texttechnologylab.org"); spacyUrls.put("Syntok", "http://paragraph-syntok.service.component.duui.texttechnologylab.org/"); DUUIComposer composer = setListComposer(spacyUrls); cas = runPipeline(cas, composer); diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 02687feb..c34c6f15 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -1,6 +1,8 @@ package org.texttechnologylab.uce.analysis; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; @@ -8,11 +10,41 @@ import org.texttechnologylab.uce.analysis.modules.*; import org.texttechnologylab.uce.analysis.typeClasses.TextClass; + + + +import java.time.Instant; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.io.InputStream; +import java.io.DataOutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; + import java.util.*; public class RunDUUIPipeline { + private static final AnalysisCache analysisCache = new AnalysisCache(); + private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>(); + private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class); + private static final ThreadLocal currentUserIdTL = new ThreadLocal<>(); + + + public static AnalysisSession getCachedSession(String analysisId) { + return analysisCache.get(analysisId); + } + public static void setThreadLocalUserId(String userId) { + currentUserIdTL.set(userId); + } + + private static String getCurrentUserId() { + // TODO: replace with your auth/session identity + + return currentUserIdTL.get(); + } public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception { ModelResources modelResources = new ModelResources(); @@ -189,10 +221,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT newCas.setDocumentText(text); cas = newCas; + logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); + } // run pipeline DUUIComposer composer = pipeline.setComposer(modelInfosMap); JCas result = pipeline.runPipeline(cas, composer); + logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); // get results Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups); // print results @@ -232,9 +267,29 @@ public DUUIInformation getModelResources(List modelGroups, String inputT if (isCohmetrix) { duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups); } + String analysisId = UUID.randomUUID().toString(); + String userId = getCurrentUserId(); + logger.info("[USER] Running pipeline for User: " + userId); + String title = "Analysis " + Instant.now(); + + byte[] xmiBytes = toXmiBytes(result); + AnalysisSession session = new AnalysisSession( + analysisId, userId, title, /*externalId*/ null, + result, /*xmiBytes*/ xmiBytes + ); + analysisCache.put(session); + lastAnalysisIdTL.set(analysisId); + logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); return duuiInformation; } + public AnalysisResponse getModelResourcesWithHandle(List modelGroups, String inputText, String claim, + String coherenceText, String stanceText, String systemPrompt) throws Exception { + DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt); + String id = lastAnalysisIdTL.get(); + return new AnalysisResponse(id, info); + } + public static void main(String[] args) throws Exception { ModelResources modelResources = new ModelResources(); List modelGroups = modelResources.getGroupedModelObjects(); @@ -256,5 +311,195 @@ public static void main(String[] args) throws Exception { DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt); } + public static final class AnalysisResponse { + public final String analysisId; + public final DUUIInformation duuiInformation; + + public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) { + this.analysisId = analysisId; + this.duuiInformation = duuiInformation; + } + } + + + //AnalysisSession + public static final class AnalysisSession { + public final String analysisId; + public final String userId; + public final long createdAtMillis; + public final String title; + public final String externalId; + public final JCas jcas; + public final byte[] xmiBytes; + + public AnalysisSession(String analysisId, String userId, String title, String externalId, + JCas jcas, byte[] xmiBytes) { + this.analysisId = analysisId; + this.userId = userId; + this.title = title; + this.externalId = externalId; + this.createdAtMillis = System.currentTimeMillis(); + this.jcas = jcas; + this.xmiBytes = xmiBytes; + } + } + + + // AnalysisCache + public static final class AnalysisCache { + private final Map map = new ConcurrentHashMap<>(); + private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes + + public void put(AnalysisSession s) { map.put(s.analysisId, s); } + + public AnalysisSession get(String id) { // Retrieve a session from the cache + AnalysisSession s = map.get(id); + if (s == null) return null; + + if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it + map.remove(id); + return null; + } + return s; + } + +// public void remove(String id) { +// map.remove(id); +// } //Manually remove a session by ID +// +// +// public void cleanupExpired() { // cleanup all expired sessions +// long now = System.currentTimeMillis(); +// for (var entry : map.entrySet()) { +// AnalysisSession s = entry.getValue(); +// if (now - s.createdAtMillis > ttlMillis) { +// map.remove(entry.getKey()); +// logger.info("[CRON] Removed expired session: " + s.analysisId); +// } +// } +// } +// } +// private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes +// java.util.concurrent.Executors.newScheduledThreadPool(1); +// +// static { +// scheduler.scheduleAtFixedRate(() -> { +// try { +// analysisCache.cleanupExpired(); +// } catch (Exception e) { +// logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); +// } +// }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); +// +// scheduler.scheduleAtFixedRate(() -> { +// logger.info("[CACHE] Running cache cleanup task..."); +// analysisCache.cleanupExpired(); // your cleanup method +// }, 1, 5, TimeUnit.MINUTES); +// +// + } + private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + org.apache.uima.cas.impl.XmiCasSerializer ser = + new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem()); + org.apache.uima.util.XMLSerializer xmlSer = + new org.apache.uima.util.XMLSerializer(bos, true); + xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); + ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); + return bos.toByteArray(); + } + + + // When we send CAS to the importer via HTTP, we want to capture the response. + // This small class acts like a container for the HTTP response details + private static class HttpResult { + final int status; + final String body; + final String locationHeader; + HttpResult(int status, String body, String locationHeader) { + this.status = status; this.body = body; this.locationHeader = locationHeader; + } + } + + + // Send CAS via HTTP + private static HttpResult postMultipart(String urlStr, + Map fields, + String fileField, String filename, + String fileContentType, byte[] fileBytes) throws Exception { + String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body + URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); + + try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body + // text fields + for (var e : fields.entrySet()) { + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n"); + out.write(e.getValue().getBytes(StandardCharsets.UTF_8)); + out.writeBytes("\r\n"); + } + // file field + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n"); + out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n"); + out.write(fileBytes); + out.writeBytes("\r\n"); + out.writeBytes("--" + boundary + "--\r\n"); + out.flush(); + } + + int status = conn.getResponseCode(); //Read the HTTP response from the importer + String location = conn.getHeaderField("Location"); + String body; + + try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) { + body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : ""; + } + conn.disconnect(); + return new HttpResult(status, body, location); + } + + public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer + String analysisId, + long corpusId, + String documentId, + String casView) throws Exception { + AnalysisSession s = getCachedSession(analysisId); + if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId); + + byte[] casBytes = toXmiBytes(s.jcas); + + Map fields = new LinkedHashMap<>(); // Form-data fields + fields.put("analysisId", analysisId); + fields.put("corpusId", Long.toString(corpusId)); + if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId); + if (casView != null && !casView.isBlank()) fields.put("casView", casView); + + + // Send multipart as XMI + String filename = "cas_" + analysisId + ".xmi"; + logger.info("[IMPORT][HTTP] POST " + importUrl + + " corpusId=" + corpusId + " analysisId=" + analysisId + + " documentId=" + documentId + " casView=" + casView + + " file=" + filename + " (" + casBytes.length + " bytes)"); + + HttpResult res = postMultipart( + importUrl, + fields, + "file", + filename, + "application/xml", + casBytes + ); + logger.info("[IMPORT][HTTP] status=" + res.status + + (res.locationHeader != null ? " Location=" + res.locationHeader : "") + + (res.body != null && !res.body.isBlank() ? " body=" + res.body : "")); + return res; + } + } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java index 3d2671f7..1fcfd6d9 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java @@ -12,4 +12,5 @@ public class SettingsConfig { private EmbeddingsConfig embeddings; private AuthConfig authentication; private MCPConfig mcp = new MCPConfig(); + private boolean enablePathImport = false; } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 97534c64..6037916a 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -93,7 +93,7 @@ public static void main(String[] args) throws IOException { () -> new AnnotationConfigApplicationContext(SpringConfig.class), (ex) -> logger.fatal("========== [ABORT] ==========\nThe Application context couldn't be established. " + "This is very likely due to a missing/invalid database connection. UCE will have to shutdown.")); - if(context == null) return; + if (context == null) return; logger.info("Loaded application context and services."); // Execute the external database scripts @@ -115,7 +115,7 @@ public static void main(String[] args) throws IOException { logger.info(languageResource.get("search")); // Load in and test the model resources for the Analysis Engine - if(SystemStatus.UceConfig.getSettings().getAnalysis().isEnableAnalysisEngine()){ + if (SystemStatus.UceConfig.getSettings().getAnalysis().isEnableAnalysisEngine()) { var modelResources = new ModelResources(); var ttlabScorer = new TTLabScorerInfo(); var cohMetrixInfo = new CohMetrixInfo(); @@ -134,7 +134,7 @@ public static void main(String[] args) throws IOException { SystemStatus.LexiconIsCalculating = true; var lexiconService = context.getBean(LexiconService.class); var addedLexiconEntries = 0; - if(forceLexicalization) addedLexiconEntries = lexiconService.updateLexicon(true); + if (forceLexicalization) addedLexiconEntries = lexiconService.updateLexicon(true); else addedLexiconEntries = lexiconService.checkForUpdates(); logger.info("Finished updating the lexicon. Added new entries: " + addedLexiconEntries); SystemStatus.LexiconIsCalculating = false; @@ -142,23 +142,23 @@ public static void main(String[] args) throws IOException { logger.info("Checking if we can or should update any linkables... (this may take a moment depending on the time of the last update. Runs asynchronous.)"); CompletableFuture.runAsync(() -> { - try{ + try { var result = context.getBean(PostgresqlDataInterface_Impl.class).callLogicalLinksRefresh(); logger.info("Finished updating the linkables. Updated linkables: " + result); - } catch (Exception ex){ + } catch (Exception ex) { logger.error("There was an error trying to refresh linkables in the startup of the web app. App starts normally though."); } }); logger.info("Checking if we can or should update any geoname locations... (this may take a moment depending on the time of the last update. Runs asynchronous.)"); CompletableFuture.runAsync(() -> { - try{ + try { var result = context.getBean(PostgresqlDataInterface_Impl.class).callGeonameLocationRefresh(); logger.info("Finished updating the geoname locations. Updated locations: " + result); logger.info("Trying to refresh the timeline map cache..."); context.getBean(MapService.class).refreshCachedTimelineMap(false); logger.info("Finished refreshing the timeline map."); - } catch (Exception ex){ + } catch (Exception ex) { logger.error("There was an error trying to refresh geoname locations in the startup of the web app. App starts normally though."); } }); @@ -176,8 +176,7 @@ public static void main(String[] args) throws IOException { // We use the externalLocation method so that the files in the public folder are hot reloaded if (commonConfig.useExternalPublicLocation()) { config.staticFiles.add(commonConfig.getPublicLocation(), Location.EXTERNAL); - } - else { + } else { config.staticFiles.add("/public", Location.CLASSPATH); } logger.info("Setup FreeMarker templates and public folders."); @@ -200,8 +199,7 @@ public static void main(String[] args) throws IOException { ExceptionUtils.tryCatchLog(() -> initMCP(registry, config), (ex) -> logger.error("There was a problem initializing the MCP server, web service will be shut down.", ex)); logger.info("MCP server initialized."); - } - else { + } else { logger.info("MCP server is disabled and will not be initialized."); } config.jsonMapper(mapper); @@ -350,174 +348,177 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi List groups = modelResources.getGroupedModelObjects(); config.router.apiBuilder(() -> { - before(ctx -> { - ctx.res().setCharacterEncoding("UTF-8"); - // Setup and log all API calls with some information. We don't want to log file uploads, since it would - // destroy the file body stream. - if (!(ctx.contentType() != null && ctx.contentType().contains("multipart/form-data"))) { - ctx.attribute("id", UUID.randomUUID().toString()); - logger.info("Received API call: ID={}, IP={}, Method={}, URI={}, QUERY={}, BODY={}", - ctx.attribute("id"), ctx.ip(), ctx.method().name(), ctx.url(), ctx.queryString(), ctx.body()); - - // Should we log to db as well? - if (commonConfig.getLogToDb() && SystemStatus.PostgresqlDbStatus.isAlive()) { - var uceLog = new UCELog(ctx.ip(), ctx.method().name(), ctx.url(), ctx.body(), ctx.queryString()); - ExceptionUtils.tryCatchLog( - () -> context.getBean(PostgresqlDataInterface_Impl.class).saveUceLog(uceLog), - (ex) -> logger.error("Error storing a log to the database: ", ex)); - logger.info("Last log was also logged to the db with id " + uceLog.getId()); - } - } else { - // Else we have a form-data upload. We handle those explicitly. - // Set the multipart data configs for uploads - ctx.req().setAttribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/tmp")); - } - - // Always inject the current system config into all UI templates - RequestContextHolder.setUceConfigHolder(SystemStatus.UceConfig); - - // Check if the request contains a language parameter - var languageResources = LanguageResources.fromRequest(ctx); - ctx.header("Content-Language", languageResources.getDefaultLanguage()); - RequestContextHolder.setLanguageResources(languageResources); - - // Check if we have an authenticated user in the session and inject it into the template - if (SystemStatus.UceConfig.getSettings().getAuthentication().isActivated()) { - var user = SessionManager.getUserFromRequest(ctx); - RequestContextHolder.setAuthenticatedUceUser(user); - } - }); + before(ctx -> { + ctx.res().setCharacterEncoding("UTF-8"); + // Setup and log all API calls with some information. We don't want to log file uploads, since it would + // destroy the file body stream. + if (!(ctx.contentType() != null && ctx.contentType().contains("multipart/form-data"))) { + ctx.attribute("id", UUID.randomUUID().toString()); + logger.info("Received API call: ID={}, IP={}, Method={}, URI={}, QUERY={}, BODY={}", + ctx.attribute("id"), ctx.ip(), ctx.method().name(), ctx.url(), ctx.queryString(), ctx.body()); + + // Should we log to db as well? + if (commonConfig.getLogToDb() && SystemStatus.PostgresqlDbStatus.isAlive()) { + var uceLog = new UCELog(ctx.ip(), ctx.method().name(), ctx.url(), ctx.body(), ctx.queryString()); + ExceptionUtils.tryCatchLog( + () -> context.getBean(PostgresqlDataInterface_Impl.class).saveUceLog(uceLog), + (ex) -> logger.error("Error storing a log to the database: ", ex)); + logger.info("Last log was also logged to the db with id " + uceLog.getId()); + } + } else { + // Else we have a form-data upload. We handle those explicitly. + // Set the multipart data configs for uploads + ctx.req().setAttribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/tmp")); + } + + // Always inject the current system config into all UI templates + RequestContextHolder.setUceConfigHolder(SystemStatus.UceConfig); + + // Check if the request contains a language parameter + var languageResources = LanguageResources.fromRequest(ctx); + ctx.header("Content-Language", languageResources.getDefaultLanguage()); + RequestContextHolder.setLanguageResources(languageResources); + + // Check if we have an authenticated user in the session and inject it into the template + if (SystemStatus.UceConfig.getSettings().getAuthentication().isActivated()) { + var user = SessionManager.getUserFromRequest(ctx); + RequestContextHolder.setAuthenticatedUceUser(user); + } + }); + + // Landing page + get("/", ctx -> { + var model = new HashMap(); + model.put("title", SystemStatus.UceConfig.getMeta().getName()); + model.put("corpora", context.getBean(PostgresqlDataInterface_Impl.class) + .getAllCorpora() + .stream().map(Corpus::getViewModel) + .toList()); + model.put("commonConf", commonConfig); + model.put("isSparqlAlive", SystemStatus.JenaSparqlStatus.isAlive()); + model.put("isAuthAlive", SystemStatus.AuthenticationService.isAlive()); + model.put("isDbAlive", SystemStatus.PostgresqlDbStatus.isAlive()); + model.put("isRagAlive", SystemStatus.RagServiceStatus.isAlive()); + model.put("isS3StorageAlive", SystemStatus.S3StorageStatus.isAlive()); + model.put("isLexiconCalculating", SystemStatus.LexiconIsCalculating); + model.put("alphabetList", StringUtils.getAlphabetAsList()); + model.put("lexiconEntriesCount", context.getBean(LexiconService.class).countLexiconEntries()); + model.put("lexiconizableAnnotations", LexiconService.lexiconizableAnnotations); + model.put("uceVersion", commonConfig.getUceVersion()); + model.put("modelGroups", groups); + model.put("ttlabScorer", taInputMap); + model.put("cohMetrix", cohMetrixMap); + + // The vm files are located under the resources directory + ctx.render("index.ftl", model); + }); + + // Potential imprint + get("/imprint", ctx -> { + var model = new HashMap(); + model.put("imprint", SystemStatus.UceConfig.getCorporate().getImprint()); + ctx.render("imprint.ftl", model); + }); + + // A document reader view + get("/documentReader", (ctx) -> (registry.get(DocumentApi.class)).getSingleDocumentReadView(ctx)); + + // A corpus World View + get("/globe", (ctx) -> (registry.get(DocumentApi.class)).get3dGlobe(ctx)); + + + path("/auth", () -> { + get("/login", (ctx) -> (registry.get(AuthenticationApi.class)).loginCallback(ctx)); + get("/logout", (ctx) -> (registry.get(AuthenticationApi.class)).logoutCallback(ctx)); + }); + + // API routes + path("/api", () -> { + before("/*", (ctx) -> { + }); - // Landing page - get("/", ctx -> { - var model = new HashMap(); - model.put("title", SystemStatus.UceConfig.getMeta().getName()); - model.put("corpora", context.getBean(PostgresqlDataInterface_Impl.class) - .getAllCorpora() - .stream().map(Corpus::getViewModel) - .toList()); - model.put("commonConf", commonConfig); - model.put("isSparqlAlive", SystemStatus.JenaSparqlStatus.isAlive()); - model.put("isAuthAlive", SystemStatus.AuthenticationService.isAlive()); - model.put("isDbAlive", SystemStatus.PostgresqlDbStatus.isAlive()); - model.put("isRagAlive", SystemStatus.RagServiceStatus.isAlive()); - model.put("isS3StorageAlive", SystemStatus.S3StorageStatus.isAlive()); - model.put("isLexiconCalculating", SystemStatus.LexiconIsCalculating); - model.put("alphabetList", StringUtils.getAlphabetAsList()); - model.put("lexiconEntriesCount", context.getBean(LexiconService.class).countLexiconEntries()); - model.put("lexiconizableAnnotations", LexiconService.lexiconizableAnnotations); - model.put("uceVersion", commonConfig.getUceVersion()); - model.put("modelGroups", groups); - model.put("ttlabScorer", taInputMap); - model.put("cohMetrix", cohMetrixMap); - - // The vm files are located under the resources directory - ctx.render("index.ftl", model); + path("/ie", () -> { + post("/upload/uima", (ctx) -> (registry.get(ImportExportApi.class)).uploadUIMA(ctx)); + get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx)); + post("/import/path", (ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromPath(ctx)); + post("/import/upload",(ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromUpload(ctx)); + }); + + path("/wiki", () -> { + get("/page", (ctx) -> (registry.get(WikiApi.class)).getPage(ctx)); + get("/annotation", (ctx) -> (registry.get(WikiApi.class)).getAnnotation(ctx)); + path("/linkable", () -> { + post("/node", (ctx) -> (registry.get(WikiApi.class)).getLinkableNode(ctx)); + }); + path("/lexicon", () -> { + post("/entries", (ctx) -> (registry.get(WikiApi.class)).getLexicon(ctx)); + post("/occurrences", (ctx) -> (registry.get(WikiApi.class)).getOccurrencesOfLexiconEntry(ctx)); }); + post("/queryOntology", (ctx) -> (registry.get(WikiApi.class)).queryOntology(ctx)); + }); - // Potential imprint - get("/imprint", ctx -> { - var model = new HashMap(); - model.put("imprint", SystemStatus.UceConfig.getCorporate().getImprint()); - ctx.render("imprint.ftl", model); + path("/corpus", () -> { + get("/inspector", (ctx) -> (registry.get(DocumentApi.class)).getCorpusInspectorView(ctx)); + get("/documentsList", (ctx) -> (registry.get(DocumentApi.class)).getDocumentListOfCorpus(ctx)); + path("/map", () -> { + post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx)); + post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx)); }); + }); - // A document reader view - get("/documentReader", (ctx) -> (registry.get(DocumentApi.class)).getSingleDocumentReadView(ctx)); + path("/search", () -> { + post("/default", (ctx) -> (registry.get(SearchApi.class)).search(ctx)); + post("/semanticRole", (ctx) -> (registry.get(SearchApi.class)).semanticRoleSearch(ctx)); + post("/layered", (ctx) -> (registry.get(SearchApi.class)).layeredSearch(ctx)); + get("/active/page", (ctx) -> (registry.get(SearchApi.class)).activeSearchPage(ctx)); + get("/active/sort", (ctx) -> (registry.get(SearchApi.class)).activeSearchSort(ctx)); + get("/semanticRole/builder", (ctx) -> (registry.get(SearchApi.class)).getSemanticRoleBuilderView(ctx)); + }); - // A corpus World View - get("/globe", (ctx) -> (registry.get(DocumentApi.class)).get3dGlobe(ctx)); + path("/analysis", () -> { + post("/runPipeline", (ctx) -> (registry.get(AnalysisApi.class)).runPipeline(ctx)); + get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx)); + post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx)); + post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx)); + post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path + }); + path("/corpusUniverse", () -> { + // Gets a corpus universe view + get("/new", (ctx) -> (registry.get(CorpusUniverseApi.class)).getCorpusUniverseView(ctx)); + post("/fromSearch", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromSearch(ctx)); + post("/fromCorpus", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromCorpus(ctx)); + get("/nodeInspectorContent", (ctx) -> (registry.get(CorpusUniverseApi.class)).getNodeInspectorContentView(ctx)); + }); - path("/auth", () -> { - get("/login", (ctx) -> (registry.get(AuthenticationApi.class)).loginCallback(ctx)); - get("/logout", (ctx) -> (registry.get(AuthenticationApi.class)).logoutCallback(ctx)); - }); + path("/document", () -> { + get("/reader/pagesList", (ctx) -> (registry.get(DocumentApi.class)).getPagesListView(ctx)); + get("/uceMetadata", (ctx) -> (registry.get(DocumentApi.class)).getUceMetadataOfDocument(ctx)); + get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx)); + get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx)); + get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx)); + get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx)); + get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx)); + get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx)); + get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx)); + get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx)); + get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx)); + delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx)); + get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx)); + get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx)); + }); - // API routes - path("/api", () -> { - before("/*", (ctx) -> { - }); - - path("/ie", () -> { - post("/upload/uima", (ctx) -> (registry.get(ImportExportApi.class)).uploadUIMA(ctx)); - get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx)); - }); - - path("/wiki", () -> { - get("/page", (ctx) -> (registry.get(WikiApi.class)).getPage(ctx)); - get("/annotation", (ctx) -> (registry.get(WikiApi.class)).getAnnotation(ctx)); - path("/linkable", () -> { - post("/node", (ctx) -> (registry.get(WikiApi.class)).getLinkableNode(ctx)); - }); - path("/lexicon", () -> { - post("/entries", (ctx) -> (registry.get(WikiApi.class)).getLexicon(ctx)); - post("/occurrences", (ctx) -> (registry.get(WikiApi.class)).getOccurrencesOfLexiconEntry(ctx)); - }); - post("/queryOntology", (ctx) -> (registry.get(WikiApi.class)).queryOntology(ctx)); - }); - - path("/corpus", () -> { - get("/inspector", (ctx) -> (registry.get(DocumentApi.class)).getCorpusInspectorView(ctx)); - get("/documentsList", (ctx) -> (registry.get(DocumentApi.class)).getDocumentListOfCorpus(ctx)); - path("/map", () -> { - post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx)); - post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx)); - }); - }); - - path("/search", () -> { - post("/default", (ctx) -> (registry.get(SearchApi.class)).search(ctx)); - post("/semanticRole", (ctx) -> (registry.get(SearchApi.class)).semanticRoleSearch(ctx)); - post("/layered", (ctx) -> (registry.get(SearchApi.class)).layeredSearch(ctx)); - get("/active/page", (ctx) -> (registry.get(SearchApi.class)).activeSearchPage(ctx)); - get("/active/sort", (ctx) -> (registry.get(SearchApi.class)).activeSearchSort(ctx)); - get("/semanticRole/builder", (ctx) -> (registry.get(SearchApi.class)).getSemanticRoleBuilderView(ctx)); - }); - - path("/analysis", () -> { - post("/runPipeline", (ctx) -> (registry.get(AnalysisApi.class)).runPipeline(ctx)); - get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx)); - post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx)); - post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx)); - }); - - path("/corpusUniverse", () -> { - // Gets a corpus universe view - get("/new", (ctx) -> (registry.get(CorpusUniverseApi.class)).getCorpusUniverseView(ctx)); - post("/fromSearch", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromSearch(ctx)); - post("/fromCorpus", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromCorpus(ctx)); - get("/nodeInspectorContent", (ctx) -> (registry.get(CorpusUniverseApi.class)).getNodeInspectorContentView(ctx)); - }); - - path("/document", () -> { - get("/reader/pagesList", (ctx) -> (registry.get(DocumentApi.class)).getPagesListView(ctx)); - get("/uceMetadata", (ctx) -> (registry.get(DocumentApi.class)).getUceMetadataOfDocument(ctx)); - get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx)); - get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx)); - get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx)); - get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx)); - get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx)); - get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx)); - get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx)); - get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx)); - get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx)); - delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx)); - get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx)); - get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx)); - }); - - path("/rag", () -> { - get("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); - // NOTE we allow also "post" here, as the system prompt can get quite long... - post("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); - post("/postUserMessage", (ctx) -> (registry.get(RAGApi.class)).postUserMessage(ctx)); - get("/messages", (ctx) -> (registry.get(RAGApi.class)).getMessagesForChat(ctx)); - get("/plotTsne", (ctx) -> (registry.get(RAGApi.class)).getTsnePlot(ctx)); - get("/sentenceEmbeddings", (ctx) -> (registry.get(RAGApi.class)).getSentenceEmbeddings(ctx)); - }); - }); + path("/rag", () -> { + get("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); + // NOTE we allow also "post" here, as the system prompt can get quite long... + post("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); + post("/postUserMessage", (ctx) -> (registry.get(RAGApi.class)).postUserMessage(ctx)); + get("/messages", (ctx) -> (registry.get(RAGApi.class)).getMessagesForChat(ctx)); + get("/plotTsne", (ctx) -> (registry.get(RAGApi.class)).getTsnePlot(ctx)); + get("/sentenceEmbeddings", (ctx) -> (registry.get(RAGApi.class)).getSentenceEmbeddings(ctx)); }); + }); + }); } private static JsonMapper getJsonMapper() { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index 866242bc..16fbd9eb 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -3,6 +3,7 @@ import com.google.gson.Gson; import freemarker.template.Configuration; import io.javalin.http.Context; +import io.javalin.http.Handler; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.context.ApplicationContext; @@ -10,8 +11,10 @@ import org.texttechnologylab.uce.analysis.RunDUUIPipeline; import org.texttechnologylab.uce.analysis.modules.DUUIInformation; import org.texttechnologylab.uce.common.annotations.auth.Authentication; +import org.texttechnologylab.uce.common.models.authentication.UceUser; import org.texttechnologylab.uce.common.models.dto.AnalysisRequestDto; import org.texttechnologylab.uce.common.models.dto.HistoryRequestDto; +import org.texttechnologylab.uce.web.SessionManager; import java.util.HashMap; import java.util.List; @@ -55,11 +58,19 @@ public void runPipeline(Context ctx) { model.put("inputStance", inputStance); model.put("inputLLM", inputLLM); + UceUser user = SessionManager.getUserFromRequest(ctx); + String userId = (user != null) ? user.getUsername() : "user-unknown"; + RunDUUIPipeline.setThreadLocalUserId(userId); + RunDUUIPipeline pipeline = new RunDUUIPipeline(); - DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM); + RunDUUIPipeline.AnalysisResponse resp = + pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim, + inputCoherence, inputStance, inputLLM); + DUUIInformation DataRequest = resp.duuiInformation; model.put("DUUI", DataRequest); model.put("SuccessRequest", true); model.put("modelGroups", DataRequest.getModelGroups()); + model.put("analysisId", resp.analysisId); // set history history.addDuuiInformation(String.valueOf(counter), DataRequest); @@ -180,5 +191,38 @@ public void callHistoryText(Context ctx) { ctx.render("defaultError.ftl"); } } - + // IMPORT ROUTE + @Authentication(required = Authentication.Requirement.LOGGED_IN, + route = Authentication.RouteTypes.POST, + path = "/api/analysis/importCas" + ) + public Handler importCas = ctx -> { + try { + String analysisId = ctx.queryParam("analysisId"); + if (analysisId == null || analysisId.isBlank()) { + ctx.status(400).result("Missing analysisId"); + return; + } + + // Lookup cached session + RunDUUIPipeline.AnalysisSession session = RunDUUIPipeline.getCachedSession(analysisId); + if (session == null) { + ctx.status(404).result("No cached CAS found for analysisId=" + analysisId); + return; + } + + // send to importer + long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=... + String importPath = "/api/ie/upload/uima"; + String importUrl = ctx.scheme() + "://" + ctx.host() + importPath; + + RunDUUIPipeline.sendToImporterViaHttp(importUrl, analysisId, corpusId, analysisId, null); + ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId); + } catch (NumberFormatException nfe) { + ctx.status(400).result("corpusId is required and must be a number"); + } catch (Exception e) { + logger.error("Error importing CAS", e); + ctx.status(500).result("Error importing CAS: " + e.getMessage()); + } + }; } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index a47013a9..bc43a31e 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -1,35 +1,45 @@ package org.texttechnologylab.uce.web.routes; import com.google.gson.Gson; -import com.google.gson.JsonIOException; -import com.google.gson.JsonSyntaxException; +import com.google.gson.GsonBuilder; import io.javalin.http.Context; +import io.javalin.http.UploadedFile; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.context.ApplicationContext; import org.texttechnologylab.uce.common.config.CorpusConfig; +import org.texttechnologylab.uce.common.config.corpusConfig.CorpusAnnotationConfig; +import org.texttechnologylab.uce.common.config.corpusConfig.OtherConfig; +import org.texttechnologylab.uce.common.config.corpusConfig.TaxonConfig; import org.texttechnologylab.uce.common.exceptions.DatabaseOperationException; import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; -import org.texttechnologylab.uce.common.models.corpus.Corpus; +import org.texttechnologylab.uce.common.models.imp.ImportStatus; +import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.services.PostgresqlDataInterface_Impl; import org.texttechnologylab.uce.common.services.S3StorageService; import org.texttechnologylab.uce.common.utils.StringUtils; import org.texttechnologylab.uce.corpusimporter.Importer; +import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; import java.util.HashMap; import java.util.Map; +import java.util.UUID; import java.util.concurrent.CompletableFuture; + public class ImportExportApi implements UceApi { + private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class); private S3StorageService s3StorageService; private PostgresqlDataInterface_Impl db; private ApplicationContext serviceContext; - private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class); - private static Gson gson = new Gson(); - public ImportExportApi(ApplicationContext serviceContext) { this.serviceContext = serviceContext; this.s3StorageService = serviceContext.getBean(S3StorageService.class); @@ -54,7 +64,7 @@ public void downloadUIMA(Context ctx) { ctx.res().setContentType(contentType); ctx.res().setHeader("Content-Disposition", "attachment; filename=\"" + objectName + "." + StringUtils.getExtensionByContentType(contentType) + "\""); - var buffer = new byte[8192]; + var buffer = new byte[8192]; int bytesRead; while ((bytesRead = s3Stream.read(buffer)) != -1) { out.write(buffer, 0, bytesRead); @@ -74,9 +84,11 @@ public void uploadUIMA(Context ctx) { // First, we need to know which corpus this document should be added to. var corpusId = ExceptionUtils.tryCatchLog( () -> Long.parseLong(new String(ctx.req().getPart("corpusId").getInputStream().readAllBytes(), StandardCharsets.UTF_8)), - (ex) -> logger.error("Error getting the corpusId this document should be added to. Aborting.", ex)); + (ex) -> logger.error("Error getting corpusId from request.", ex)); + if (corpusId == null) { - ctx.result("Parameter corpusId didn't exist. Without it, the document cannot be uploaded."); + ctx.status(400); + ctx.result("Parameter corpusId didn't exist; cannot upload document."); return; } @@ -91,29 +103,12 @@ public void uploadUIMA(Context ctx) { var corpus = ExceptionUtils.tryCatchLog( () -> db.getCorpusById(corpusId), - (ex) -> logger.error("Couldn't fetch corpus when uploading new document to corpusId " + corpusId, ex)); + (ex) -> logger.error("Couldn't fetch corpus with id " + corpusId, ex)); + if (corpus == null) { - var corpusConfigRaw = ExceptionUtils.tryCatchLog( - () -> new String(ctx.req().getPart("corpusConfig").getInputStream().readAllBytes(), StandardCharsets.UTF_8), - (ex) -> logger.error("Error getting the corpusConfig that should be used for this document. Aborting.", ex)); - if (corpusConfigRaw == null) { - ctx.result("Corpus with id " + corpusId + " wasn't found in the database; no config was provided; can't upload document."); - return; - } - logger.info("Corpus with id " + corpusId + " wasn't found in the database; creating a new corpus with the provided config."); - try { - var corpusConfig = gson.fromJson(corpusConfigRaw, CorpusConfig.class); - corpus = new Corpus(); - var corpusReturn = Importer.CreateDBCorpus(corpus, corpusConfig, this.db); - if (corpusReturn != null) { - corpus = corpusReturn; - } - } catch (JsonIOException | JsonSyntaxException e) { - ctx.result("The corpusConfig provided is not properly formatted."); - } catch (DatabaseOperationException e) { - ctx.result("Error creating a new corpus in the database: " + e.getMessage()); - return; - } + ctx.status(404); + ctx.result("Corpus with id " + corpusId + " wasn't found in the database."); + return; } // TODO just use 1 as default? will throw an error if this is null otherwise... @@ -122,10 +117,9 @@ public void uploadUIMA(Context ctx) { try (var input = ctx.req().getPart("file").getInputStream()) { var fileName = ctx.req().getPart("file").getSubmittedFileName(); // Import the doc in the background - final Corpus corpus1 = corpus; var importFuture = CompletableFuture.supplyAsync(() -> { try { - return importer.storeUploadedXMIToCorpusAsync(input, corpus1, fileName, documentId); + return importer.storeUploadedXMIToCorpusAsync(input, corpus, fileName, documentId); } catch (DatabaseOperationException e) { throw new RuntimeException(e); } @@ -139,6 +133,7 @@ public void uploadUIMA(Context ctx) { if (acceptedContentType != null && acceptedContentType.equals("application/json")) { Map apiResult = new HashMap<>(); apiResult.put("document_id", newDocumentId); +// ctx.contentType("application/json"); //redundant ctx.json(apiResult); return; } @@ -149,6 +144,169 @@ public void uploadUIMA(Context ctx) { ctx.status(500); ctx.result("Error uploading a file: " + e.getMessage()); } - }; + } + + + public void importCorpusFromPath(Context ctx) { + try { + String path = ctx.formParam("path"); + String numThreadStr = ctx.formParam("numThreads"); + int numThreads = (numThreadStr != null && !numThreadStr.isBlank()) ? Integer.parseInt(numThreadStr) : 1; + String casView = ctx.formParam("casView"); + + if (casView != null && casView.isBlank()) { + casView = null; + } + + if (path == null || path.isBlank()) { + ctx.status(400).result("Path is required"); + return; + } + + String importId = UUID.randomUUID().toString(); + int importerNumber = 1; + Importer importer = new Importer(serviceContext, path, importerNumber, importId, casView); + UCEImport uceImport = new UCEImport(importId, path, ImportStatus.STARTING); + Integer fileCount = ExceptionUtils.tryCatchLog(importer::getXMICountInPath, + (ex) -> logger.warn("There was an IO error counting the importable UIMA files - the import will probably fail at some point.", ex)); + uceImport.setTotalDocuments(fileCount == null ? -1 : fileCount); + db.saveOrUpdateUceImport(uceImport); + CompletableFuture.runAsync(() -> { + try { + importer.start(numThreads); + } catch (DatabaseOperationException e) { + logger.error("Error during asynchronous corpus import", e); + } + }); + ctx.status(200).result("Import started. Import ID: " + importId); + } catch (DatabaseOperationException e) { + logger.error("Error when creating saving/updating to database" + e); + ctx.status(500).result("Database error initiating corpus import" + e.getMessage()); + + } catch (Exception e) { + logger.error("Error initiating corpus import", e); + ctx.status(500).result("Error initiating import: " + e.getMessage()); + } + + } + + public void importCorpusFromUpload(Context ctx){ + try{ + String importId = UUID.randomUUID().toString(); + Path rootDir = java.nio.file.Paths.get(System.getProperty("java.io.tmpdir"), "uce_uploads", importId); + Path inputDir = rootDir.resolve("input"); + Files.createDirectories(inputDir); + + var validFiles = ctx.uploadedFiles("files").stream() + .filter(f -> f.size() > 0 && f.filename() != null && !f.filename().isBlank()) + .toList(); + + if (validFiles.isEmpty()) { + ctx.status(400).result("No files selected. Please select at least one XMI file or archive."); + return; + } + + for(UploadedFile uploadedFile : ctx.uploadedFiles("files")){ + try(InputStream input = uploadedFile.content()){ + Files.copy(input,inputDir.resolve(uploadedFile.filename()), StandardCopyOption.REPLACE_EXISTING); + } + } + + CorpusConfig config = new CorpusConfig(); + String name = ctx.formParam("name"); + if (name == null || name.isBlank()){ + ctx.status(400).result("No corpus name given"); + } + config.setName(name); + String author = ctx.formParam("author"); + if (author == null || author.isBlank()) { + ctx.status(400).result("Corpus Author is required."); + return; + } + config.setAuthor(author); + String language = ctx.formParam("language"); + if (language == null || language.isBlank()) { + ctx.status(400).result("Corpus Language is required."); + return; + } + config.setLanguage(language); + config.setDescription(ctx.formParam("description")); + String addToExistingParam = ctx.formParam("addToExistingCorpus"); + boolean addToExisting = addToExistingParam != null && Boolean.parseBoolean(addToExistingParam); + config.setAddToExistingCorpus(addToExisting); + +// Annotations + CorpusAnnotationConfig annotations = new CorpusAnnotationConfig(); + annotations.setSentence(ctx.formParam("sentence") != null); + annotations.setLemma(ctx.formParam("lemma") != null); + annotations.setNamedEntity(ctx.formParam("namedEntity") != null); + annotations.setSentiment(ctx.formParam("sentiment") != null); + annotations.setEmotion(ctx.formParam("emotion") != null); + annotations.setTime(ctx.formParam("time") != null); + annotations.setGeoNames(ctx.formParam("geoNames") != null); + annotations.setWikipediaLink(ctx.formParam("wikipediaLink") != null); + annotations.setImage(ctx.formParam("image") != null); + annotations.setUnifiedTopic(ctx.formParam("unifiedTopic") != null); + annotations.setOCRPage(ctx.formParam("OCRPage") != null); + annotations.setOCRParagraph(ctx.formParam("OCRParagraph") != null); + annotations.setOCRBlock(ctx.formParam("OCRBlock") != null); + annotations.setOCRLine(ctx.formParam("OCRLine") != null); + + TaxonConfig taxonConfig = new TaxonConfig(); + taxonConfig.setAnnotated(ctx.formParam("taxonAnnotated") != null); + taxonConfig.setBiofidOnthologyAnnotated(ctx.formParam("biofidOnthologyAnnotated") != null); + + annotations.setTaxon(taxonConfig); + config.setAnnotations(annotations); + +// Other Settings + OtherConfig otherConfig = new OtherConfig(); + otherConfig.setEnableEmbeddings(ctx.formParam("enableEmbeddings") != null); + otherConfig.setEnableRAGBot(ctx.formParam("enableRAGBot") != null); + otherConfig.setIncludeKeywordDistribution(ctx.formParam("includeKeywordDistribution") != null); + otherConfig.setEnableS3Storage(ctx.formParam("enableS3Storage") != null); + config.setOther(otherConfig); + + Gson gson = new GsonBuilder().setPrettyPrinting().create(); + String jsonString = gson.toJson(config); + Files.writeString(rootDir.resolve("corpusConfig.json"),jsonString,StandardCharsets.UTF_8); + + String numThreadStr = ctx.formParam("numThreads"); + int numThreads = (numThreadStr != null && !numThreadStr.isBlank()) ? Integer.parseInt(numThreadStr) : 1; + String casView = ctx.formParam("casView"); + if(casView != null && casView.isBlank()) casView = null; + int importerNumber = 1; + Importer importer = new Importer(serviceContext,rootDir.toString(),importerNumber,importId,casView); + + String logTitle = (addToExisting ? "ADD_TO:" : "UPLOAD_NEW:") + name; + UCEImport uceImport = new UCEImport(importId,logTitle,ImportStatus.STARTING); + Integer fileCount = ExceptionUtils.tryCatchLog(importer::getXMICountInPath, + (ex) -> logger.warn("IO Error counting upload files.",ex)); + uceImport.setTotalDocuments(fileCount == null ? -1 : fileCount); + db.saveOrUpdateUceImport(uceImport); + CompletableFuture.runAsync(() -> { + try{ + importer.start(numThreads); + } catch (DatabaseOperationException e) { + logger.error("Error during asynchronous corpus uplaod import",e); + }finally { + try { + org.apache.commons.io.FileUtils.deleteDirectory(rootDir.toFile()); + } catch (IOException e) { + logger.warn("Could not delete temp upload dir: " + rootDir,e); + } + } + }); + + ctx.status(200).result("Upload sucessfull. Import started with ID: " + importId); + + } catch (IOException e) { + logger.error("Error handling file upload import", e); + ctx.status(500).result("Error during upload " + e.getMessage()); + } catch (DatabaseOperationException e) { + logger.error("Error saving/updating database during Uce Import", e); + ctx.status(500).result("Error during saving/updating database " + e.getMessage()); + } + } }