-
Notifications
You must be signed in to change notification settings - Fork 16
Import a corpus from path via the Uce Portal UI #132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
MakuIan
wants to merge
11
commits into
texttechnologylab:main
Choose a base branch
from
MakuIan:import_from_path_ui
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
10f4b2d
added function for: analysis result send to the importer and save ana…
duaakb e68ec44
enabled pipeline run again
duaakb ea1b987
replaced system.out with logger.info
duaakb 9f1d81c
Added input field for corpusId
duaakb 38f76b0
derive importer URL from request host; remove hardcoded localhost
duaakb f47df35
Restored importer_cache_branch changes after stash
duaakb 9026fb2
Merge remote-tracking branch 'origin/develop' into importer_cache_branch
duaakb e46ae61
Changed corpus ID input to dropdown
duaakb 6337f47
Added a modal from where a user can import a corpus via the corpus im…
4b01d7c
Funktionalitaet des corpus import from path nun konfigurierbar.
62877ff
New Import where Users can select which files to upload to either a n…
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
19 changes: 19 additions & 0 deletions
19
uce.portal/resources/templates/wiki/analysisResultFragment.ftl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,18 +1,50 @@ | ||
| package org.texttechnologylab.uce.analysis; | ||
|
|
||
| import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; | ||
| import org.apache.logging.log4j.LogManager; | ||
| import org.apache.logging.log4j.Logger; | ||
| import org.apache.uima.fit.factory.JCasFactory; | ||
| import org.apache.uima.fit.util.JCasUtil; | ||
| import org.apache.uima.jcas.JCas; | ||
| import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer; | ||
| import org.texttechnologylab.uce.analysis.modules.*; | ||
| import org.texttechnologylab.uce.analysis.typeClasses.TextClass; | ||
|
|
||
|
|
||
|
|
||
|
|
||
| import java.time.Instant; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.io.InputStream; | ||
| import java.io.DataOutputStream; | ||
| import java.net.HttpURLConnection; | ||
| import java.net.URL; | ||
| import java.nio.charset.StandardCharsets; | ||
|
|
||
| import java.util.*; | ||
|
|
||
|
|
||
| public class RunDUUIPipeline { | ||
| private static final AnalysisCache analysisCache = new AnalysisCache(); | ||
| private static final ThreadLocal<String> lastAnalysisIdTL = new ThreadLocal<>(); | ||
| private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class); | ||
| private static final ThreadLocal<String> currentUserIdTL = new ThreadLocal<>(); | ||
|
|
||
|
|
||
| public static AnalysisSession getCachedSession(String analysisId) { | ||
| return analysisCache.get(analysisId); | ||
| } | ||
|
|
||
| public static void setThreadLocalUserId(String userId) { | ||
| currentUserIdTL.set(userId); | ||
| } | ||
|
|
||
| private static String getCurrentUserId() { | ||
| // TODO: replace with your auth/session identity | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inzwischen gibt es ein optionales "Permission-System", hier bitte die User-ID (bzw Name) des Keycloak-Logins verwenden. |
||
|
|
||
| return currentUserIdTL.get(); | ||
| } | ||
|
|
||
| public DUUIInformation getModelResources(List<String> modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception { | ||
| ModelResources modelResources = new ModelResources(); | ||
|
|
@@ -189,10 +221,13 @@ public DUUIInformation getModelResources(List<String> modelGroups, String inputT | |
| newCas.setDocumentText(text); | ||
| cas = newCas; | ||
|
|
||
| logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); | ||
|
|
||
| } | ||
| // run pipeline | ||
| DUUIComposer composer = pipeline.setComposer(modelInfosMap); | ||
| JCas result = pipeline.runPipeline(cas, composer); | ||
| logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); | ||
| // get results | ||
| Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups); | ||
| // print results | ||
|
|
@@ -232,9 +267,29 @@ public DUUIInformation getModelResources(List<String> modelGroups, String inputT | |
| if (isCohmetrix) { | ||
| duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups); | ||
| } | ||
| String analysisId = UUID.randomUUID().toString(); | ||
| String userId = getCurrentUserId(); | ||
| logger.info("[USER] Running pipeline for User: " + userId); | ||
| String title = "Analysis " + Instant.now(); | ||
|
|
||
| byte[] xmiBytes = toXmiBytes(result); | ||
| AnalysisSession session = new AnalysisSession( | ||
| analysisId, userId, title, /*externalId*/ null, | ||
| result, /*xmiBytes*/ xmiBytes | ||
| ); | ||
| analysisCache.put(session); | ||
| lastAnalysisIdTL.set(analysisId); | ||
| logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); | ||
| return duuiInformation; | ||
| } | ||
|
|
||
| public AnalysisResponse getModelResourcesWithHandle(List<String> modelGroups, String inputText, String claim, | ||
| String coherenceText, String stanceText, String systemPrompt) throws Exception { | ||
| DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt); | ||
| String id = lastAnalysisIdTL.get(); | ||
| return new AnalysisResponse(id, info); | ||
| } | ||
|
|
||
| public static void main(String[] args) throws Exception { | ||
| ModelResources modelResources = new ModelResources(); | ||
| List<ModelGroup> modelGroups = modelResources.getGroupedModelObjects(); | ||
|
|
@@ -256,5 +311,195 @@ public static void main(String[] args) throws Exception { | |
| DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt); | ||
|
|
||
| } | ||
| public static final class AnalysisResponse { | ||
| public final String analysisId; | ||
| public final DUUIInformation duuiInformation; | ||
|
|
||
| public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) { | ||
| this.analysisId = analysisId; | ||
| this.duuiInformation = duuiInformation; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| //AnalysisSession | ||
| public static final class AnalysisSession { | ||
| public final String analysisId; | ||
| public final String userId; | ||
| public final long createdAtMillis; | ||
| public final String title; | ||
| public final String externalId; | ||
| public final JCas jcas; | ||
| public final byte[] xmiBytes; | ||
|
|
||
| public AnalysisSession(String analysisId, String userId, String title, String externalId, | ||
| JCas jcas, byte[] xmiBytes) { | ||
| this.analysisId = analysisId; | ||
| this.userId = userId; | ||
| this.title = title; | ||
| this.externalId = externalId; | ||
| this.createdAtMillis = System.currentTimeMillis(); | ||
| this.jcas = jcas; | ||
| this.xmiBytes = xmiBytes; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| // AnalysisCache | ||
| public static final class AnalysisCache { | ||
| private final Map<String, AnalysisSession> map = new ConcurrentHashMap<>(); | ||
| private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes | ||
|
|
||
| public void put(AnalysisSession s) { map.put(s.analysisId, s); } | ||
|
|
||
| public AnalysisSession get(String id) { // Retrieve a session from the cache | ||
| AnalysisSession s = map.get(id); | ||
| if (s == null) return null; | ||
|
|
||
| if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it | ||
| map.remove(id); | ||
| return null; | ||
| } | ||
| return s; | ||
| } | ||
|
|
||
| // public void remove(String id) { | ||
| // map.remove(id); | ||
| // } //Manually remove a session by ID | ||
| // | ||
| // | ||
| // public void cleanupExpired() { // cleanup all expired sessions | ||
| // long now = System.currentTimeMillis(); | ||
| // for (var entry : map.entrySet()) { | ||
| // AnalysisSession s = entry.getValue(); | ||
| // if (now - s.createdAtMillis > ttlMillis) { | ||
| // map.remove(entry.getKey()); | ||
| // logger.info("[CRON] Removed expired session: " + s.analysisId); | ||
| // } | ||
| // } | ||
| // } | ||
| // } | ||
| // private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes | ||
| // java.util.concurrent.Executors.newScheduledThreadPool(1); | ||
| // | ||
| // static { | ||
| // scheduler.scheduleAtFixedRate(() -> { | ||
| // try { | ||
| // analysisCache.cleanupExpired(); | ||
| // } catch (Exception e) { | ||
| // logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); | ||
| // } | ||
| // }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); | ||
| // | ||
| // scheduler.scheduleAtFixedRate(() -> { | ||
| // logger.info("[CACHE] Running cache cleanup task..."); | ||
| // analysisCache.cleanupExpired(); // your cleanup method | ||
| // }, 1, 5, TimeUnit.MINUTES); | ||
| // | ||
| // | ||
| } | ||
| private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception { | ||
| java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); | ||
| org.apache.uima.cas.impl.XmiCasSerializer ser = | ||
| new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem()); | ||
| org.apache.uima.util.XMLSerializer xmlSer = | ||
| new org.apache.uima.util.XMLSerializer(bos, true); | ||
| xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); | ||
| ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); | ||
| return bos.toByteArray(); | ||
| } | ||
|
|
||
|
|
||
| // When we send CAS to the importer via HTTP, we want to capture the response. | ||
| // This small class acts like a container for the HTTP response details | ||
| private static class HttpResult { | ||
| final int status; | ||
| final String body; | ||
| final String locationHeader; | ||
| HttpResult(int status, String body, String locationHeader) { | ||
| this.status = status; this.body = body; this.locationHeader = locationHeader; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| // Send CAS via HTTP | ||
| private static HttpResult postMultipart(String urlStr, | ||
| Map<String, String> fields, | ||
| String fileField, String filename, | ||
| String fileContentType, byte[] fileBytes) throws Exception { | ||
| String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body | ||
| URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint | ||
| HttpURLConnection conn = (HttpURLConnection) url.openConnection(); | ||
| conn.setDoOutput(true); | ||
| conn.setRequestMethod("POST"); | ||
| conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); | ||
|
|
||
| try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body | ||
| // text fields | ||
| for (var e : fields.entrySet()) { | ||
| out.writeBytes("--" + boundary + "\r\n"); | ||
| out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n"); | ||
| out.write(e.getValue().getBytes(StandardCharsets.UTF_8)); | ||
| out.writeBytes("\r\n"); | ||
| } | ||
| // file field | ||
| out.writeBytes("--" + boundary + "\r\n"); | ||
| out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n"); | ||
| out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n"); | ||
| out.write(fileBytes); | ||
| out.writeBytes("\r\n"); | ||
| out.writeBytes("--" + boundary + "--\r\n"); | ||
| out.flush(); | ||
| } | ||
|
|
||
| int status = conn.getResponseCode(); //Read the HTTP response from the importer | ||
| String location = conn.getHeaderField("Location"); | ||
| String body; | ||
|
|
||
| try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) { | ||
| body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : ""; | ||
| } | ||
| conn.disconnect(); | ||
| return new HttpResult(status, body, location); | ||
| } | ||
|
|
||
| public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer | ||
| String analysisId, | ||
| long corpusId, | ||
| String documentId, | ||
| String casView) throws Exception { | ||
| AnalysisSession s = getCachedSession(analysisId); | ||
| if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId); | ||
|
|
||
| byte[] casBytes = toXmiBytes(s.jcas); | ||
|
|
||
| Map<String, String> fields = new LinkedHashMap<>(); // Form-data fields | ||
| fields.put("analysisId", analysisId); | ||
| fields.put("corpusId", Long.toString(corpusId)); | ||
| if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId); | ||
| if (casView != null && !casView.isBlank()) fields.put("casView", casView); | ||
|
|
||
|
|
||
| // Send multipart as XMI | ||
| String filename = "cas_" + analysisId + ".xmi"; | ||
| logger.info("[IMPORT][HTTP] POST " + importUrl | ||
| + " corpusId=" + corpusId + " analysisId=" + analysisId | ||
| + " documentId=" + documentId + " casView=" + casView | ||
| + " file=" + filename + " (" + casBytes.length + " bytes)"); | ||
|
|
||
| HttpResult res = postMultipart( | ||
| importUrl, | ||
| fields, | ||
| "file", | ||
| filename, | ||
| "application/xml", | ||
| casBytes | ||
| ); | ||
| logger.info("[IMPORT][HTTP] status=" + res.status | ||
| + (res.locationHeader != null ? " Location=" + res.locationHeader : "") | ||
| + (res.body != null && !res.body.isBlank() ? " body=" + res.body : "")); | ||
| return res; | ||
| } | ||
|
|
||
|
|
||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mevbagci bitte schau mal welche URL hier wirklich verwendet werden soll.