From 1a31da6109f272a36d2bb9805e81b14dcb369063 Mon Sep 17 00:00:00 2001
From: OlafVAdan <75035976+OlafVAdan@users.noreply.github.com>
Date: Tue, 8 Mar 2022 15:08:00 +0100
Subject: [PATCH] modified to transcribe text to speech continuously
In addition, the modified speech sample integrates 'Named Entity Recognition' (NER) and 'key-phrase extraction' (KPE) modules provided by ASC 'Text Analysis'.
---
.env | 6 +-
.eslintcache | 1 +
package-lock.json | 588 ++++++++++++++++++++++++++++++++--------------
package.json | 1 +
server/index.js | 42 ++++
src/App.js | 239 +++++++++++++++----
src/textclient.js | 24 ++
7 files changed, 680 insertions(+), 221 deletions(-)
create mode 100644 .eslintcache
create mode 100644 src/textclient.js
diff --git a/.env b/.env
index bdeb0f9..649be8b 100644
--- a/.env
+++ b/.env
@@ -1,2 +1,4 @@
-SPEECH_KEY=paste-your-speech-key-here
-SPEECH_REGION=paste-your-speech-region-here
+SPEECH_KEY=YOUR_SPEECH_KEY_HERE
+SPEECH_REGION=YOUR_REGION_HERE
+TEXT_KEY=YOUR_TEXT_ANALYSIS_KEY_HERE
+TEXT_ENDPOINT=YOUR_CORRESPONDING_ENDPOINT_HERE
diff --git a/.eslintcache b/.eslintcache
new file mode 100644
index 0000000..74b642b
--- /dev/null
+++ b/.eslintcache
@@ -0,0 +1 @@
+[{"C:\\Users\\s153848\\co-n-versualize\\AzureSpeechReactSample-main\\src\\index.js":"1","C:\\Users\\s153848\\co-n-versualize\\AzureSpeechReactSample-main\\src\\App.js":"2","C:\\Users\\s153848\\co-n-versualize\\AzureSpeechReactSample-main\\src\\token_util.js":"3","C:\\Users\\s153848\\co-n-versualize\\AzureSpeechReactSample-main\\src\\textclient.js":"4"},{"size":239,"mtime":1633533139810,"results":"5","hashOfConfig":"6"},{"size":17418,"mtime":1641046167333,"results":"7","hashOfConfig":"6"},{"size":981,"mtime":1633531369355,"results":"8","hashOfConfig":"6"},{"size":808,"mtime":1633536481972,"results":"9","hashOfConfig":"6"},{"filePath":"10","messages":"11","errorCount":0,"warningCount":0,"fixableErrorCount":0,"fixableWarningCount":0,"usedDeprecatedRules":"12"},"w0zloz",{"filePath":"13","messages":"14","errorCount":0,"warningCount":5,"fixableErrorCount":0,"fixableWarningCount":0,"source":"15","usedDeprecatedRules":"12"},{"filePath":"16","messages":"17","errorCount":0,"warningCount":0,"fixableErrorCount":0,"fixableWarningCount":0,"usedDeprecatedRules":"18"},{"filePath":"19","messages":"20","errorCount":0,"warningCount":1,"fixableErrorCount":0,"fixableWarningCount":1,"source":null},"C:\\Users\\s153848\\co-n-versualize\\AzureSpeechReactSample-main\\src\\index.js",[],["21","22"],"C:\\Users\\s153848\\co-n-versualize\\AzureSpeechReactSample-main\\src\\App.js",["23","24","25","26","27"],"import React, { Component } from 'react';\nimport { Container } from 'reactstrap';\nimport { getTokenOrRefresh } from './token_util';\nimport './custom.css'\nimport { ResultReason } from 'microsoft-cognitiveservices-speech-sdk';\nimport axios from 'axios';\n\nrequire('dotenv').config();\n\n//cognitive services\nconst speechsdk = require('microsoft-cognitiveservices-speech-sdk')\n\nconst { TextAnalyticsClient, AzureKeyCredential } = require(\"@azure/ai-text-analytics\");\n\nconst key = '7889a8e1c4084f5daf69c71673b8c6fa';\nconst endpoint = 'https://textingthings.cognitiveservices.azure.com/';\nconst textAnalyticsClient = new TextAnalyticsClient(endpoint, new AzureKeyCredential(key));\n\nexport default class App extends Component {\n constructor(props) {\n super(props);\n \n this.state = {\n displayText: null,\n entitiescomp: null,\n keyphrases: null,\n recognizerboy: null,\n ctr: 0,\n }\n }\n \n async componentDidMount() {\n // check for valid speech key/region\n const tokenRes = await getTokenOrRefresh();\n if (tokenRes.authToken === null) {\n this.setState({\n displayText: 'FATAL_ERROR: ' + tokenRes.error\n });\n }\n }\n \n //get keyphrases -----------> TODO: get whole analyzed object bc more info jwz, wellicht gwn alleen die 2nd ding die je wilt (entities)\n async keyPhraseExtraction(recognizedtext){\n let displayText;\n\n const client = textAnalyticsClient;\n const text = recognizedtext;\n const keyPhrasesInput = [\n text,\n ];\n\n const keyPhraseResult = await client.extractKeyPhrases(keyPhrasesInput);\n \n keyPhraseResult.forEach(document => {\n // console.log(`ID: ${document.id}`);\n displayText = `${document.keyPhrases}`;\n });\n\n this.setState({\n keyphrases: displayText,\n });\n console.log(\"keyphraseresult: \" + keyPhraseResult);\n \n }\n\n async entityRecognition(recognizedtext){\n //let displayText;\n const client = textAnalyticsClient;\n const text = recognizedtext;\n\n const entityInputs = [\n text,\n // \"Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800\",\n // \"La sede principal de Microsoft se encuentra en la ciudad de Redmond, a 21 kilómetros de Seattle.\"\n ];\n\n const entityResults = await client.recognizeEntities(entityInputs);\n \n entityResults.forEach(document => {\n console.log(`Document ID: ${document.id}`);\n if (document.entities != null){\n document.entities.forEach(entity => {\n console.log(`\\tName: ${entity.text} \\tCategory: ${entity.category} \\tSubcategory: ${entity.subCategory ? entity.subCategory : \"N/A\"}`);\n console.log(`\\tScore: ${entity.confidenceScore}`);\n });\n }\n });\n\n this.setState({\n entitiescomp: entityResults,\n });\n console.log(\"entityresults\" + entityResults[0].entities);\n }\n // entityRecognition(textAnalyticsClient);\n\n //check if state (i.e. first displayText, then keyphrase&entity) updated, send to api\n async componentDidUpdate(prevState) {\n // Typical usage (don't forget to compare props):\n if (this.state.displayText !== prevState.displayText) {\n if(this.state.entitiescomp !== null){\n if (this.state.entitiescomp !== prevState.entitiescomp && this.state.keyphrases !== prevState.keyphrases){\n //copy state & remove client info\n console.log(this.state);\n const sentence = this.state.displayText;\n const keyphrases = this.state.keyphrases;\n const entities = this.state.entitiescomp[0].entities;\n try{\n await axios.post('http://localhost:3002/processing/poster', {sentence,keyphrases,entities}, {\n headers: {\n 'Content-Type': 'application/json'\n }\n }).then(\n console.log('it was updated, so we updating and doing the networkthing')\n );\n } catch(err) {\n if(err.response === 500) {\n console.log('it fucked up in the server');\n } else {\n console.log(\"error:\" + err.response);\n }\n };\n }\n }\n }\n }\n \n async sttFromMic(){\n let displayText;\n var Ctr = this.state.ctr;\n let utterances = [\n // \"Deliberating how online social interaction can't compare to the quality of face to face conversations, Erickson and Kellogg stated It is through conversation that we create, develop, validate, and share knowledge. Based their arguments. Coconut was developed in a research-through-design process with the aim to enhance and foster the quality of such conversations \",\n // \"Correspondingly, in its approach to Natural Language Understanding, rather than identifying topics from large datasets of documents such as news articles and social media content, its algorithm defines an utterance during a conversation as a document.\",\n // \"Natural Language Processing to transcribe spoken language to text, as well as initial text analysis, is performed through Microsoft Azure Cognitive Services. As a result, keyphrases and predefined named entities are identified for each utterance, which are then processed and stored to facilitate correlated topic modeling.\",\n // \"As such, the interface contributes to Natural Language Understanding by visualizing how topical knowledge is carried through a conversation in real time.\",\n\n // \"Deliberating how online social interaction cannot compare to the quality of face-to-face conversations, Erickson and Kellogg stated It is through conversation that we create, develop, validate, and share knowledge. Based their arguments, Coconut was developed in a research-through-design process with the aim to enhance and foster the quality of such conversations.\",\n // \"In its core, it makes use of natural language processing by Microsoft to transcribe an utterance to text and identify its key phrases and entities, after which Coconut processes these to keep track of their relation to each other.\",\n // \"In its approach to Natural Language Understanding, rather than identifying topics from a large dataset of written documents, Coconut identifies topics from a conversation through utterances to create, develop, validate, and share its topical knowledge with those conversing.\",\n // \"In a nutshell, through correlated topic modeling in real time, Coconut visualizes how topical knowledge unfolds in conversations.\",\n \"Deliberating how online social interaction can't compare to the quality of face to face conversations, Erickson and Kellogg stated It is through conversation that we create, develop, validate, and share knowledge. Based their arguments. Coconut was developed in a research-through-design process with the aim to enhance and foster the quality of such conversations.\",\n \"Correspondingly, in its approach to Natural Language Understanding, rather than identifying topics from large datasets of documents such as news articles and social media content, its algorithm defines an utterance during a conversation as a document.\",\n \"Natural Language Processing to transcribe spoken language to text, as well as initial text analysis, is facilitated by Microsoft Azure Cognitive Services. As a result, keyphrases and predefined named entities are identified for each utterance, which are then processed and stored to facilitate correlated topic modeling.\",\n \"As such, the interface contributes to Natural Language Understanding by visualizing how topical knowledge is carried through a conversation in real time.\",\n \"Admittedly, the current prototype is rather limited in terms of interaction. However, it demonstrates how utterance based topic modeling presents a variety of design and design research opportunities. For example, it could be used to automatically generate mindmaps, visualize arguments made during political debates or even uncover novel approaches in qualitative data analysis.\",\n \"Fun fact by the way, to automatically generate mindmaps was actually why Coconut was developed in the first place! That being said, it should be noted that the current prototype is merely a component of what it envisions. As the upcoming iterations is where things actually get interesting.\",\n \"Considering the quality of face to face conversations encompasses much more than spoken language, it should be noted that the current model does not incorporate vital information such as facial expressions, body language and the situated context of a conversation. Moreover, it is still subjected to a two dimensional interface.\",\n \"So how do we capture such vital information? How may Coconut take into consideration the natural language components of body language and situated context?\",\n \"In order to move from a two to a three dimensional interface, the next iteration entails integration of the Azure Kinect DK sensor, which will not only allow for body language to be coupled to utterances, but bridge the digital and the physical by enabling full bodied interaction in a three-dimensional tangible interface.\"\n \n\n\n // \"Admittedly, the current prototype is rather limited in terms of interaction. However, it demonstrates how utterance based topic modeling presents a variety of design and design research opportunities. For example, it could be used to automatically generate mindmaps, visualize arguments made during political debates or even uncover novel approaches in qualitative data analysis.\",\n // \"Fun fact by the way, to automatically generate mindmaps was actually why Coconut was developed in the first place! That being said, it should be noted that the current prototype is merely a component of what it envisions. As the upcoming iterations is where things actually get interesting.\",\n // \"Considering the quality of face to face conversations encompasses much more than spoken language, it should be noted that the current model does not incorporate vital information such as facial expressions, body language and the situated context of a conversation. Moreover, it is still subjected to a two dimensional interface.\",\n // \"So how do we capture such vital information? How may Coconut take into consideration the natural language components of body language and situated context?\",\n // \"In order to move from a two to a three dimensional interface, the next iteration entails integration of the Azure Kinect DK sensor, which will not only allow for body language to be coupled to utterances, but bridge the digital and the physical by enabling full bodied interaction in a three-dimensional tangible interface\"\n // \"Erickson and Kellogg stated it is through conversation that we create, develop, validate and share knowledge. Building on their argument of how online social interaction can't compare to the quality or face to face conversations, this interface was designed with the aim to facilitate live feedback during a conversation.\",\n // \"In its approach to natural language understanding, rather than identifying topics from large data sets of documents such as news articles and social media content, the algorithm defines a single utterance of spoken language as a document.\",\n // \"Under the hood, Microsoft Azure Cognitive Services facilitates natural language processing to transcribe spoken language to text and perform initial text analysis, for which the latter identifies key phrases and predefined named entities for each utterance\",\n // \"These are then processed and stored to facilitate correlated topic modelling. As such, the interface visualizes how topical knowledge is carried through a conversation in real time\",\n // \".\"\n ]\n\n \n displayText = utterances[Ctr];\n Ctr = Ctr + 1;\n if(Ctr == utterances.length){\n Ctr = 0;\n }\n\n this.keyPhraseExtraction(displayText);\n this.entityRecognition(displayText);\n\n this.setState({\n displayText: displayText,\n ctr: Ctr\n });\n }\n\n //React example single utterance (15s max.)\n // async sttFromMic() {\n // const tokenObj = await getTokenOrRefresh();\n // const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region);\n // speechConfig.speechRecognitionLanguage = 'en-US';\n \n // const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput();\n // const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig);\n\n // this.setState({\n // displayText: 'speak into your microphone...'\n // });\n\n // recognizer.recognizeOnceAsync(result => {\n // let displayText;\n \n\n // if (result.reason === ResultReason.RecognizedSpeech) {\n // displayText = `${result.text}`;\n \n // } else {\n // displayText = 'ERROR: Speech was cancelled or could not be recognized. Ensure your microphone is working properly.';\n // }\n // // this.keyPhraseExtraction(displayText);\n // });\n // }\n\n //Awesome Olaf doing continious jwz\n async sttFromMicCont(){\n const tokenObj = await getTokenOrRefresh();\n const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region);\n speechConfig.speechRecognitionLanguage = 'en-US';\n \n const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput();\n const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig);\n\n this.setState({recognizerboy: recognizer})\n\n let displayText;\n\n recognizer.recognizing = (s, e) => {\n //console.log(`RECOGNIZING: Text=${e.result.text}`);\n //displayText = `${e.result.text}`;\n };\n \n \n recognizer.recognized = (s, e) => {\n if (e.result.reason == speechsdk.ResultReason.RecognizedSpeech) {\n console.log(`RECOGNIZED: Text=${e.result.text}`);\n displayText = `${e.result.text}`;\n this.keyPhraseExtraction(displayText);\n this.entityRecognition(displayText);\n this.setState({displayText: displayText});\n }\n else if (e.result.reason == speechsdk.ResultReason.NoMatch) {\n console.log(\"NOMATCH: Speech could not be recognized.\");\n }\n };\n \n recognizer.canceled = (s, e) => {\n console.log(`CANCELED: Reason=${e.reason}`);\n \n if (e.reason == speechsdk.CancellationReason.Error) {\n console.log(`\"CANCELED: ErrorCode=${e.errorCode}`);\n console.log(`\"CANCELED: ErrorDetails=${e.errorDetails}`);\n console.log(\"CANCELED: Did you update the key and location/region info?\");\n }\n \n recognizer.stopContinuousRecognitionAsync();\n };\n \n recognizer.sessionStopped = (s, e) => {\n console.log(\"\\n Session stopped event.\");\n recognizer.stopContinuousRecognitionAsync();\n };\n\n recognizer.startContinuousRecognitionAsync();\n }\n\n //button to stop the recogniiton process\n async sttFromMicContStop() {\n const recognizer = this.state.recognizerboy;\n recognizer.stopContinuousRecognitionAsync();\n }\n\n render() {\n return (\n \n
Speech sample app
\n\n
\n
\n this.sttFromMic()}>\n Convert speech to text from your mic once.\n \n\n
\n this.sttFromMicCont() } >\n Convert speech to text continuously\n
\n\n
\n this.sttFromMicContStop() } >\n Stop converting speech to text continuously\n
this.sttFromMic()}>
- Convert speech to text from your mic.
+ Convert speech to text from your mic once.
+
-
- this.fileChange(e)}
- style={{display: "none"}}
- />
- Convert speech to text from an audio file.
+ this.sttFromMicCont() } >
+ Convert speech to text continuously
+
+
+ this.sttFromMicContStop() } >
+ Stop converting speech to text continuously
+
+
{this.state.displayText}
diff --git a/src/textclient.js b/src/textclient.js
new file mode 100644
index 0000000..e0e4c27
--- /dev/null
+++ b/src/textclient.js
@@ -0,0 +1,24 @@
+"use strict";
+require('dotenv').config();
+const { TextAnalyticsClient, AzureKeyCredential } = require("@azure/ai-text-analytics");
+
+const key = 'REDACTED';
+const endpoint = 'REDACTED';
+const textAnalyticsClient = new TextAnalyticsClient(endpoint, new AzureKeyCredential(key));
+
+async function keyPhraseExtraction(client){
+
+ const keyPhrasesInput = [
+ "My cat might need to see a veterinarian.",
+ ];
+ const keyPhraseResult = await client.extractKeyPhrases(keyPhrasesInput);
+
+ keyPhraseResult.forEach(document => {
+ console.log(`ID: ${document.id}`);
+ console.log(` ${document.keyPhrases}`);
+ });
+}
+
+keyPhraseExtraction(textAnalyticsClient);
+
+module.exports = textAnalyticsClient;
\ No newline at end of file