-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathmain.py
More file actions
214 lines (181 loc) · 6.43 KB
/
main.py
File metadata and controls
214 lines (181 loc) · 6.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python
# coding: utf-8
# audio to text
import speech_recognition as sr
import pyttsx3
# text to emotion
from text2emotion import get_emotion
# Chatbot
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer
from chatterbot.trainers import ChatterBotCorpusTrainer
# display reply
import threading
# import pyglet
# user emotion
from fer import FER
import matplotlib.pyplot as plt
import cv2
import os
# display gif
import imageio
# hide logging info
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# simple video-chat-bot
class VCBot():
def __init__(self):
# flag - false if vcbot is diplaying reply (speaking something)
global flag
flag = True
self.r = sr.Recognizer()
# Chatbot
self.chatbot = ChatBot(
'Charlie'
)
self.trainer = ChatterBotCorpusTrainer(self.chatbot)
self.trainer.train(
*self.get_samples()
)
# speak
self.engine = pyttsx3.init()
self.engine.setProperty('rate', 145)
logger.info("VCbot initialized")
def get_samples(self):
__dir__ = os.path.dirname(os.path.realpath('__dir__'))
sample_dir = os.path.join(__dir__, 'resources', 'samples')
sample_list = [os.path.join(sample_dir, file)
for file in os.listdir(sample_dir)]
return sample_list
def getUserEmotion(self):
# detector for facial emotion
detector = FER(mtcnn=True)
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
cv2.imwrite('temp.jpeg', frame)
img = plt.imread('temp.jpeg')
res = detector.detect_emotions(img)
os.remove('temp.jpeg')
if len(res) == 0:
logger.info("No face detected")
return 'neutral'
res_emotion = res[0]['emotions']
return max(res_emotion, key=res_emotion.get)
def audioToText(self):
# convert user audio to text (language = english)
try:
# use the microphone as source for input.
with sr.Microphone() as source2:
# wait for a second to let the recognizer
# adjust the energy threshold based on
# the surrounding noise level
self.r.adjust_for_ambient_noise(source2, duration=0.2)
# listens for the user's input
audio2 = self.r.listen(source2)
# Using ggogle to recognize audio
MyText = self.r.recognize_google(audio2)
MyText = MyText.lower()
return MyText
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
logger.warning(
"Aud2text: Could not request results; {0}".format(e))
return 'error'
except sr.UnknownValueError:
print("unknown error occured")
logger.warning("Aud2text: unknown error occured")
return 'error'
def getTextEmotion(self, t):
# derive emotion from any text input
res = get_emotion(t)
emotion = max(res, key=res.get)
if res[emotion] == 0:
return 'neutral'
return emotion.lower()
def getChatReply(self, q):
# chatterbot reply for given text input
return str(self.chatbot.get_response(q))
def vid(self, lock):
# use gif for displaying reply to user
gif = imageio.mimread('./resources/boy-talk.gif')
nums = len(gif)
imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in gif]
i = 0
global flag
while True:
lock.acquire()
if flag:
# no movement if not speaking
cv2.imshow("gif", imgs[0])
else:
# gif enabled for speaking
cv2.imshow("gif", imgs[i])
lock.release()
pressed = cv2.waitKey(25) & 0xFF
if pressed == ord('q'):
# quit
logger.info("user requested to quit!")
break
if pressed == ord('r'):
# just to check if gif is used in dynamic sense (check by pressing 'r')
lock.acquire()
flag = not flag
lock.release()
i = (i+1) % nums
cv2.destroyAllWindows()
def SpeakText(self, command):
# Initialize the engine to speak
self.engine.say(command)
self.engine.runAndWait()
def ensemble(self, lock):
# combined together: video-emotion, audio-to-text, text-emotion, emotion validation, chat-reply, speak-reply
txt = ''
while txt != 'exit':
video_emotion = self.getUserEmotion()
# print(video_emotion)
logger.info("Video Emotion: " + video_emotion)
self.SpeakText('Your Turn')
txt = self.audioToText()
# print('aud2txt: ', txt)
logger.info("Audio To Text: " + txt)
txt_emotion = self.getTextEmotion(txt)
# print('textEmotion: ', txt_emotion)
logger.info("Text Emotion: " + txt_emotion)
txt_inference = ''
if txt_emotion != video_emotion and video_emotion != 'neutral':
txt_inference = 'I am '+video_emotion
logger.info("Added Text: " + txt_inference)
cbot_reply = self.getChatReply(txt+txt_inference)
# print('cbot: ', cbot_reply)
logger.info("Chatbot Reply: " + cbot_reply)
lock.acquire()
global flag
flag = False
lock.release()
self.SpeakText(cbot_reply)
logger.info("Speaking something")
lock.acquire()
flag = True
lock.release()
def run(self):
# thread lock for critical section
lock = threading.Lock()
# creating thread
# thread 1: display gif
t1 = threading.Thread(target=self.vid, args=(lock,))
# thread2: ensemble
t2 = threading.Thread(target=self.ensemble, args=(lock,))
# starting thread 1
t1.start()
# starting thread 2
t2.start()
# wait until thread 1 is completely executed
t1.join()
# wait until thread 2 is completely executed
t2.join()
# both threads completely executed
print("Done!")
logger.info("Successfully completed execution, terminating vcbot!")
return
# VCBot().run()