From ef3906807bdad29b2d67a76d93889b6fab0aec16 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Mon, 15 May 2017 13:27:54 +0900 Subject: [PATCH 01/26] Implement audio recorder using audioRecord class --- AudioExample/AudioExample.js | 10 ++-- .../rnim/rn/audio/AudioRecorderManager.java | 55 +++++++++++++++++++ index.js | 6 ++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index a65e04d6..8dcdf84a 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -20,7 +20,7 @@ class AudioExample extends Component { recording: false, stoppedRecording: false, finished: false, - audioPath: AudioUtils.DocumentDirectoryPath + '/test.aac', + audioPath: AudioUtils.DocumentDirectoryPath + '/test.wav', hasPermission: undefined, }; @@ -40,8 +40,8 @@ class AudioExample extends Component { if (!hasPermission) return; - this.prepareRecordingPath(this.state.audioPath); - + // this.prepareRecordingPath(this.state.audioPath); + console.log(AudioRecorder); AudioRecorder.onProgress = (data) => { this.setState({currentTime: Math.floor(data.currentTime)}); }; @@ -113,7 +113,7 @@ class AudioExample extends Component { this.setState({stoppedRecording: true, recording: false}); try { - const filePath = await AudioRecorder.stopRecording(); + const filePath = await AudioRecorder.stopStreaming(); if (Platform.OS === 'android') { this._finishRecording(true, filePath); @@ -168,7 +168,7 @@ class AudioExample extends Component { this.setState({recording: true}); try { - const filePath = await AudioRecorder.startRecording(); + const filePath = await AudioRecorder.startStreaming(this.state.audioPath); } catch (error) { console.error(error); } diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 33c65d0f..375df8f6 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -20,12 +20,17 @@ import java.util.TimerTask; import android.content.pm.PackageManager; +import android.media.AudioFormat; +import android.media.AudioRecord; +import android.os.AsyncTask; import android.os.Environment; import android.media.MediaRecorder; import android.media.AudioManager; import android.support.v4.app.ActivityCompat; import android.support.v4.content.ContextCompat; import android.util.Log; +import android.widget.Toast; + import com.facebook.react.modules.core.DeviceEventManagerModule; import java.io.FileInputStream; @@ -49,6 +54,9 @@ class AudioRecorderManager extends ReactContextBaseJavaModule { private Timer timer; private int recorderSecondsElapsed; + // For AudioRecord Class + private RecordWaveTask recordTask = null; + public AudioRecorderManager(ReactApplicationContext reactContext) { super(reactContext); @@ -155,6 +163,53 @@ private int getOutputFormatFromString(String outputFormat) { } } + @ReactMethod + public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSettings, Promise promise) { + + } + + @ReactMethod + public void startStreaming(String recordingPath, Promise promise){ + if (recordTask == null) { + recordTask = new RecordWaveTask(context); + } else { + recordTask.setContext(context); + } + switch (recordTask.getStatus()) { + case RUNNING: + Toast.makeText(context, "Task already running...", Toast.LENGTH_SHORT).show(); + logAndRejectPromise(promise, "INVALID_STATE", "Please call stopStreaming before starting streaming"); + return; + case FINISHED: + recordTask = new RecordWaveTask(context); + break; + case PENDING: + if (recordTask.isCancelled()) { + recordTask = new RecordWaveTask(context); + } + } + File wavFile = new File(recordingPath); + Toast.makeText(context, wavFile.getAbsolutePath(), Toast.LENGTH_LONG).show(); + recordTask.execute(wavFile); + + isRecording = true; + currentOutputFile = recordingPath; + promise.resolve(currentOutputFile); + } + + @ReactMethod + public void stopStreaming(Promise promise){ + if (!recordTask.isCancelled() && recordTask.getStatus() == AsyncTask.Status.RUNNING) { + isRecording = false; + recordTask.cancel(false); + promise.resolve(currentOutputFile); + sendEvent("recordingFinished", null); + } else { + Toast.makeText(context, "Task not running.", Toast.LENGTH_SHORT).show(); + logAndRejectPromise(promise, "INVALID_STATE", "Please call startStreaming before stopping streaming"); + } + } + @ReactMethod public void startRecording(Promise promise){ if (recorder == null){ diff --git a/index.js b/index.js index 450ed839..23e7decc 100644 --- a/index.js +++ b/index.js @@ -65,6 +65,12 @@ var AudioRecorder = { stopRecording: function() { return AudioRecorderManager.stopRecording(); }, + startStreaming: function(path) { + return AudioRecorderManager.startStreaming(path); + }, + stopStreaming: function() { + return AudioRecorderManager.stopStreaming(); + }, checkAuthorizationStatus: AudioRecorderManager.checkAuthorizationStatus, requestAuthorization: AudioRecorderManager.requestAuthorization, removeListeners: function() { From 89776f0efe1a60e97c70b81a74eadb16e257f22b Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Mon, 15 May 2017 17:18:48 +0900 Subject: [PATCH 02/26] Add streaming API --- .../rnim/rn/audio/AudioRecorderManager.java | 27 +- .../com/rnim/rn/audio/RecordWaveTask.java | 299 ++++++++++++++++++ 2 files changed, 315 insertions(+), 11 deletions(-) create mode 100644 android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 375df8f6..3747eea9 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -61,6 +61,11 @@ class AudioRecorderManager extends ReactContextBaseJavaModule { public AudioRecorderManager(ReactApplicationContext reactContext) { super(reactContext); this.context = reactContext; + if (recordTask == null) { + recordTask = new RecordWaveTask(context); + } else { + recordTask.setContext(context); + } } @Override @@ -170,14 +175,9 @@ public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSe @ReactMethod public void startStreaming(String recordingPath, Promise promise){ - if (recordTask == null) { - recordTask = new RecordWaveTask(context); - } else { - recordTask.setContext(context); - } switch (recordTask.getStatus()) { case RUNNING: - Toast.makeText(context, "Task already running...", Toast.LENGTH_SHORT).show(); + // Toast.makeText(context, "Task already running...", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call stopStreaming before starting streaming"); return; case FINISHED: @@ -189,7 +189,7 @@ public void startStreaming(String recordingPath, Promise promise){ } } File wavFile = new File(recordingPath); - Toast.makeText(context, wavFile.getAbsolutePath(), Toast.LENGTH_LONG).show(); + // Toast.makeText(context, wavFile.getAbsolutePath(), Toast.LENGTH_LONG).show(); recordTask.execute(wavFile); isRecording = true; @@ -198,14 +198,19 @@ public void startStreaming(String recordingPath, Promise promise){ } @ReactMethod - public void stopStreaming(Promise promise){ + public void stopStreaming(final Promise promise){ if (!recordTask.isCancelled() && recordTask.getStatus() == AsyncTask.Status.RUNNING) { isRecording = false; + recordTask.setCancelCompleteListener(new RecordWaveTask.OnCancelCompleteListener() { + @Override + public void onCancelCompleted() { + promise.resolve(currentOutputFile); + sendEvent("recordingFinished", null); + } + }); recordTask.cancel(false); - promise.resolve(currentOutputFile); - sendEvent("recordingFinished", null); } else { - Toast.makeText(context, "Task not running.", Toast.LENGTH_SHORT).show(); + // Toast.makeText(context, "Task not running.", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call startStreaming before stopping streaming"); } } diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java new file mode 100644 index 00000000..faf5ea13 --- /dev/null +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -0,0 +1,299 @@ +package com.rnim.rn.audio; + +import android.content.Context; +import android.media.AudioFormat; +import android.media.AudioRecord; +import android.media.MediaRecorder; +import android.os.AsyncTask; +import android.os.SystemClock; +import android.util.Log; +import android.widget.Toast; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Locale; + +/** + * Created by KDH on 2017. 5. 15.. + */ + +public class RecordWaveTask extends AsyncTask { + + // Configure me! + private static final int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC; + private static final int SAMPLE_RATE = 44100; // Hz + private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT; + private static final int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; + // + + private static final int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); + + private Context ctx; + + public RecordWaveTask(Context ctx) { + setContext(ctx); + } + + public void setContext(Context ctx) { + this.ctx = ctx; + } + + // Step 1 - This interface defines the type of messages I want to communicate to my owner + public interface OnCancelCompleteListener { + public void onCancelCompleted(); + } + private OnCancelCompleteListener listener = null; + + public void setCancelCompleteListener(OnCancelCompleteListener listener) { + this.listener = listener; + } + + /** + * Opens up the given file, writes the header, and keeps filling it with raw PCM bytes from + * AudioRecord until it reaches 4GB or is stopped by the user. It then goes back and updates + * the WAV header to include the proper final chunk sizes. + * + * @param files Index 0 should be the file to write to + * @return Either an Exception (error) or two longs, the filesize, elapsed time in ms (success) + */ + @Override + protected Object[] doInBackground(File... files) { + AudioRecord audioRecord = null; + FileOutputStream wavOut = null; + long startTime = 0; + long endTime = 0; + + try { + // Open our two resources + audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, BUFFER_SIZE); + wavOut = new FileOutputStream(files[0]); + + // Write out the wav file header + writeWavHeader(wavOut, CHANNEL_MASK, SAMPLE_RATE, ENCODING); + + // Avoiding loop allocations + byte[] buffer = new byte[BUFFER_SIZE]; + boolean run = true; + int read; + long total = 0; + + // Let's go + startTime = SystemClock.elapsedRealtime(); + audioRecord.startRecording(); + while (run && !isCancelled()) { + read = audioRecord.read(buffer, 0, buffer.length); + + // WAVs cannot be > 4 GB due to the use of 32 bit unsigned integers. + if (total + read > 4294967295L) { + // Write as many bytes as we can before hitting the max size + for (int i = 0; i < read && total <= 4294967295L; i++, total++) { + wavOut.write(buffer[i]); + } + run = false; + } else { + // Write out the entire read buffer + wavOut.write(buffer, 0, read); + total += read; + } + } + } catch (IOException ex) { + return new Object[]{ex}; + } finally { + if (audioRecord != null) { + try { + if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) { + audioRecord.stop(); + endTime = SystemClock.elapsedRealtime(); + } + } catch (IllegalStateException ex) { + // + } + if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) { + audioRecord.release(); + } + } + if (wavOut != null) { + try { + wavOut.close(); + } catch (IOException ex) { + // + } + } + } + + try { + // This is not put in the try/catch/finally above since it needs to run + // after we close the FileOutputStream + updateWavHeader(files[0]); + } catch (IOException ex) { + return new Object[] { ex }; + } + + return new Object[] { files[0].length(), endTime - startTime }; + } + + /** + * Writes the proper 44-byte RIFF/WAVE header to/for the given stream + * Two size fields are left empty/null since we do not yet know the final stream size + * + * @param out The stream to write the header to + * @param channelMask An AudioFormat.CHANNEL_* mask + * @param sampleRate The sample rate in hertz + * @param encoding An AudioFormat.ENCODING_PCM_* value + * @throws IOException + */ + private static void writeWavHeader(OutputStream out, int channelMask, int sampleRate, int encoding) throws IOException { + short channels; + switch (channelMask) { + case AudioFormat.CHANNEL_IN_MONO: + channels = 1; + break; + case AudioFormat.CHANNEL_IN_STEREO: + channels = 2; + break; + default: + throw new IllegalArgumentException("Unacceptable channel mask"); + } + + short bitDepth; + switch (encoding) { + case AudioFormat.ENCODING_PCM_8BIT: + bitDepth = 8; + break; + case AudioFormat.ENCODING_PCM_16BIT: + bitDepth = 16; + break; + case AudioFormat.ENCODING_PCM_FLOAT: + bitDepth = 32; + break; + default: + throw new IllegalArgumentException("Unacceptable encoding"); + } + + writeWavHeader(out, channels, sampleRate, bitDepth); + } + + /** + * Writes the proper 44-byte RIFF/WAVE header to/for the given stream + * Two size fields are left empty/null since we do not yet know the final stream size + * + * @param out The stream to write the header to + * @param channels The number of channels + * @param sampleRate The sample rate in hertz + * @param bitDepth The bit depth + * @throws IOException + */ + private static void writeWavHeader(OutputStream out, short channels, int sampleRate, short bitDepth) throws IOException { + // Convert the multi-byte integers to raw bytes in little endian format as required by the spec + byte[] littleBytes = ByteBuffer + .allocate(14) + .order(ByteOrder.LITTLE_ENDIAN) + .putShort(channels) + .putInt(sampleRate) + .putInt(sampleRate * channels * (bitDepth / 8)) + .putShort((short) (channels * (bitDepth / 8))) + .putShort(bitDepth) + .array(); + + // Not necessarily the best, but it's very easy to visualize this way + out.write(new byte[]{ + // RIFF header + 'R', 'I', 'F', 'F', // ChunkID + 0, 0, 0, 0, // ChunkSize (must be updated later) + 'W', 'A', 'V', 'E', // Format + // fmt subchunk + 'f', 'm', 't', ' ', // Subchunk1ID + 16, 0, 0, 0, // Subchunk1Size + 1, 0, // AudioFormat + littleBytes[0], littleBytes[1], // NumChannels + littleBytes[2], littleBytes[3], littleBytes[4], littleBytes[5], // SampleRate + littleBytes[6], littleBytes[7], littleBytes[8], littleBytes[9], // ByteRate + littleBytes[10], littleBytes[11], // BlockAlign + littleBytes[12], littleBytes[13], // BitsPerSample + // data subchunk + 'd', 'a', 't', 'a', // Subchunk2ID + 0, 0, 0, 0, // Subchunk2Size (must be updated later) + }); + } + + /** + * Updates the given wav file's header to include the final chunk sizes + * + * @param wav The wav file to update + * @throws IOException + */ + private static void updateWavHeader(File wav) throws IOException { + byte[] sizes = ByteBuffer + .allocate(8) + .order(ByteOrder.LITTLE_ENDIAN) + // There are probably a bunch of different/better ways to calculate + // these two given your circumstances. Cast should be safe since if the WAV is + // > 4 GB we've already made a terrible mistake. + .putInt((int) (wav.length() - 8)) // ChunkSize + .putInt((int) (wav.length() - 44)) // Subchunk2Size + .array(); + + RandomAccessFile accessWave = null; + //noinspection CaughtExceptionImmediatelyRethrown + try { + accessWave = new RandomAccessFile(wav, "rw"); + // ChunkSize + accessWave.seek(4); + accessWave.write(sizes, 0, 4); + + // Subchunk2Size + accessWave.seek(40); + accessWave.write(sizes, 4, 4); + } catch (IOException ex) { + // Rethrow but we still close accessWave in our finally + throw ex; + } finally { + if (accessWave != null) { + try { + accessWave.close(); + } catch (IOException ex) { + // + } + } + } + } + + @Override + protected void onCancelled(Object[] results) { + // Handling cancellations and successful runs in the same way + onPostExecute(results); + } + + @Override + protected void onPostExecute(Object[] results) { + Throwable throwable = null; + if (results[0] instanceof Throwable) { + // Error + throwable = (Throwable) results[0]; + Log.e(RecordWaveTask.class.getSimpleName(), throwable.getMessage(), throwable); + } + + // If we're attached to an activity + if (ctx != null) { + if (throwable == null) { + // Display final recording stats + double size = (long) results[0] / 1000000.00; + long time = (long) results[1] / 1000; + // Toast.makeText(ctx, String.format(Locale.getDefault(), "%.2f MB / %d seconds", + // size, time), Toast.LENGTH_LONG).show(); + } else { + // Error + // Toast.makeText(ctx, throwable.getLocalizedMessage(), Toast.LENGTH_LONG).show(); + } + } + + if (listener != null) { + listener.onCancelCompleted(); + } + } +} \ No newline at end of file From 083a92c7b80346490c4bafa622e5f0079c6c88d1 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Fri, 26 May 2017 15:07:38 +0900 Subject: [PATCH 03/26] Implement streaming record in Android # TODO - Supporting empty output file - iOS --- AudioExample/AudioExample.js | 9 +++-- .../rnim/rn/audio/AudioRecorderManager.java | 35 ++++++++++++++---- .../com/rnim/rn/audio/RecordWaveTask.java | 31 +++++++++++++--- index.js | 36 +++++++++++++++++-- 4 files changed, 95 insertions(+), 16 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 8dcdf84a..e68d1fef 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -40,7 +40,8 @@ class AudioExample extends Component { if (!hasPermission) return; - // this.prepareRecordingPath(this.state.audioPath); + this.prepareRecordingPath(this.state.audioPath); + AudioRecorder.prepareStreamingAtPath(this.state.audioPath); console.log(AudioRecorder); AudioRecorder.onProgress = (data) => { this.setState({currentTime: Math.floor(data.currentTime)}); @@ -52,6 +53,10 @@ class AudioExample extends Component { this._finishRecording(data.status === "OK", data.audioFileURL); } }; + + AudioRecorder.onDataReceived = (data) => { + console.log(data); + } }); } @@ -168,7 +173,7 @@ class AudioExample extends Component { this.setState({recording: true}); try { - const filePath = await AudioRecorder.startStreaming(this.state.audioPath); + const filePath = await AudioRecorder.startStreaming(); } catch (error) { console.error(error); } diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 3747eea9..c76eba37 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -10,6 +10,7 @@ import com.facebook.react.bridge.Arguments; import com.facebook.react.bridge.Promise; import com.facebook.react.bridge.ReadableMap; +import com.facebook.react.bridge.WritableArray; import com.facebook.react.bridge.WritableMap; import java.io.File; @@ -170,30 +171,48 @@ private int getOutputFormatFromString(String outputFormat) { @ReactMethod public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSettings, Promise promise) { + File wavFile = new File(recordingPath); + recordTask = new RecordWaveTask(context); + recordTask.setOutputFile(wavFile); + recordTask.setStreamListener(new RecordWaveTask.OnStreamListener() { + @Override + public void onDataReceived(byte[] buffer) { + Log.d("onDataReceived", buffer.length + ""); + WritableArray body = Arguments.createArray(); + for (byte value: buffer) { + body.pushInt((int) value); + } + sendEvent("dataReceived", body); + } + }); + currentOutputFile = recordingPath; } @ReactMethod - public void startStreaming(String recordingPath, Promise promise){ + public void startStreaming(Promise promise){ + if (recordTask == null){ + logAndRejectPromise(promise, "STREAMING_NOT_PREPARED", "Please call prepareStreamingAtPath before starting streaming"); + return; + } switch (recordTask.getStatus()) { case RUNNING: // Toast.makeText(context, "Task already running...", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call stopStreaming before starting streaming"); return; case FINISHED: - recordTask = new RecordWaveTask(context); + logAndRejectPromise(promise, "STREAMING_NOT_PREPARED", "Please call prepareStreamingAtPath before starting streaming"); break; case PENDING: if (recordTask.isCancelled()) { - recordTask = new RecordWaveTask(context); + // recordTask = new RecordWaveTask(context); } } - File wavFile = new File(recordingPath); - // Toast.makeText(context, wavFile.getAbsolutePath(), Toast.LENGTH_LONG).show(); - recordTask.execute(wavFile); + startTimer(); + + recordTask.execute(); isRecording = true; - currentOutputFile = recordingPath; promise.resolve(currentOutputFile); } @@ -204,11 +223,13 @@ public void stopStreaming(final Promise promise){ recordTask.setCancelCompleteListener(new RecordWaveTask.OnCancelCompleteListener() { @Override public void onCancelCompleted() { + recordTask = null; promise.resolve(currentOutputFile); sendEvent("recordingFinished", null); } }); recordTask.cancel(false); + stopTimer(); } else { // Toast.makeText(context, "Task not running.", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call startStreaming before stopping streaming"); diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index faf5ea13..63c92942 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -34,6 +34,7 @@ public class RecordWaveTask extends AsyncTask { private static final int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); private Context ctx; + private File outputFile; public RecordWaveTask(Context ctx) { setContext(ctx); @@ -43,14 +44,25 @@ public void setContext(Context ctx) { this.ctx = ctx; } + public void setOutputFile(File file) { this.outputFile = file; } + // Step 1 - This interface defines the type of messages I want to communicate to my owner public interface OnCancelCompleteListener { public void onCancelCompleted(); } - private OnCancelCompleteListener listener = null; + private OnCancelCompleteListener cancelCompleteListener = null; public void setCancelCompleteListener(OnCancelCompleteListener listener) { - this.listener = listener; + this.cancelCompleteListener = listener; + } + + public interface OnStreamListener { + public void onDataReceived(byte[] buffer); + } + private OnStreamListener streamListener = null; + + public void setStreamListener(OnStreamListener listener) { + this.streamListener = listener; } /** @@ -71,7 +83,7 @@ protected Object[] doInBackground(File... files) { try { // Open our two resources audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, BUFFER_SIZE); - wavOut = new FileOutputStream(files[0]); + wavOut = new FileOutputStream(this.outputFile); // Write out the wav file header writeWavHeader(wavOut, CHANNEL_MASK, SAMPLE_RATE, ENCODING); @@ -91,14 +103,23 @@ protected Object[] doInBackground(File... files) { // WAVs cannot be > 4 GB due to the use of 32 bit unsigned integers. if (total + read > 4294967295L) { // Write as many bytes as we can before hitting the max size + byte[] tmpBuffer = new byte[BUFFER_SIZE]; for (int i = 0; i < read && total <= 4294967295L; i++, total++) { wavOut.write(buffer[i]); + tmpBuffer[i] = buffer[i]; + } + if (this.streamListener != null) { + this.streamListener.onDataReceived(tmpBuffer); } run = false; } else { // Write out the entire read buffer wavOut.write(buffer, 0, read); total += read; + if (this.streamListener != null) { + Log.d("onDataReceived", "RecordWaveTask - " + buffer.length + ""); + this.streamListener.onDataReceived(buffer.clone()); + } } } } catch (IOException ex) { @@ -292,8 +313,8 @@ protected void onPostExecute(Object[] results) { } } - if (listener != null) { - listener.onCancelCompleted(); + if (cancelCompleteListener != null) { + cancelCompleteListener.onCancelCompleted(); } } } \ No newline at end of file diff --git a/index.js b/index.js index 23e7decc..961c3082 100644 --- a/index.js +++ b/index.js @@ -12,6 +12,37 @@ import ReactNative, { var AudioRecorderManager = NativeModules.AudioRecorderManager; var AudioRecorder = { + prepareStreamingAtPath: function(path, options) { + if (this.progressSubscription) this.progressSubscription.remove(); + this.progressSubscription = NativeAppEventEmitter.addListener('recordingProgress', + (data) => { + if (this.onProgress) { + this.onProgress(data); + } + } + ); + + if (this.finishedSubscription) this.finishedSubscription.remove(); + this.finishedSubscription = NativeAppEventEmitter.addListener('recordingFinished', + (data) => { + if (this.onFinished) { + this.onFinished(data); + } + } + ); + + if (this.dataReceivedSubscription) this.dataReceivedSubscription.remove(); + this.dataReceivedSubscription = NativeAppEventEmitter.addListener('dataReceived', + (data) => { + console.log(data); + if (this.onDataReceived) { + this.onDataReceived(data); + } + } + ); + + AudioRecorderManager.prepareStreamingAtPath(path, options); + }, prepareRecordingAtPath: function(path, options) { if (this.progressSubscription) this.progressSubscription.remove(); this.progressSubscription = NativeAppEventEmitter.addListener('recordingProgress', @@ -65,8 +96,8 @@ var AudioRecorder = { stopRecording: function() { return AudioRecorderManager.stopRecording(); }, - startStreaming: function(path) { - return AudioRecorderManager.startStreaming(path); + startStreaming: function() { + return AudioRecorderManager.startStreaming(); }, stopStreaming: function() { return AudioRecorderManager.stopStreaming(); @@ -76,6 +107,7 @@ var AudioRecorder = { removeListeners: function() { if (this.progressSubscription) this.progressSubscription.remove(); if (this.finishedSubscription) this.finishedSubscription.remove(); + if (this.dataReceivedSubscription) this.dataReceivedSubscription.remove(); }, }; From 81b39c23b9ebaebbe1bd094a89c4c9c474cb8c8a Mon Sep 17 00:00:00 2001 From: JeungminOh Date: Tue, 30 May 2017 18:14:00 +0900 Subject: [PATCH 04/26] Temporary commit --- AudioExample/AudioExample.js | 10 +- index.js | 28 +++- ios/AudioRecorderManager.m | 25 ++++ ios/RNAudio.xcodeproj/project.pbxproj | 6 + ios/StreamingModule.h | 35 +++++ ios/StreamingModule.m | 193 ++++++++++++++++++++++++++ package.json | 4 +- 7 files changed, 296 insertions(+), 5 deletions(-) create mode 100644 ios/StreamingModule.h create mode 100644 ios/StreamingModule.m diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index e68d1fef..bd73212e 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -41,7 +41,15 @@ class AudioExample extends Component { if (!hasPermission) return; this.prepareRecordingPath(this.state.audioPath); - AudioRecorder.prepareStreamingAtPath(this.state.audioPath); + console.log('hi3'); + AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { + SampleRate: 22050, + Channels: 1, + AudioQuality: "Low", + AudioEncoding: "aac", + AudioEncodingBitRate: 32000, + }); + console.log('hi2'); console.log(AudioRecorder); AudioRecorder.onProgress = (data) => { this.setState({currentTime: Math.floor(data.currentTime)}); diff --git a/index.js b/index.js index 961c3082..23448055 100644 --- a/index.js +++ b/index.js @@ -41,7 +41,31 @@ var AudioRecorder = { } ); - AudioRecorderManager.prepareStreamingAtPath(path, options); + var defaultOptions = { + SampleRate: 44100.0, + Channels: 2, + AudioQuality: 'High', + AudioEncoding: 'ima4', + OutputFormat: 'mpeg_4', + MeteringEnabled: false, + AudioEncodingBitRate: 32000 + }; + + var recordingOptions = {...defaultOptions, ...options}; + + if (Platform.OS === 'ios') { + console.log('prepareStreamingAtPath()'); + AudioRecorderManager.prepareStreamingAtPath( + path, + recordingOptions.SampleRate, + recordingOptions.Channels, + recordingOptions.AudioQuality, + recordingOptions.AudioEncoding, + recordingOptions.MeteringEnabled + ); + } else { + return AudioRecorderManager.prepareStreamingAtPath(path, recordingOptions); + } }, prepareRecordingAtPath: function(path, options) { if (this.progressSubscription) this.progressSubscription.remove(); @@ -97,6 +121,8 @@ var AudioRecorder = { return AudioRecorderManager.stopRecording(); }, startStreaming: function() { + console.log(AudioRecorderManager); + console.log('???'); return AudioRecorderManager.startStreaming(); }, stopStreaming: function() { diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index c8e9bbb8..50991a00 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -12,6 +12,7 @@ #import #import #import +#import "StreamingModule.h" NSString *const AudioRecorderEventProgress = @"recordingProgress"; NSString *const AudioRecorderEventFinished = @"recordingFinished"; @@ -33,6 +34,8 @@ @implementation AudioRecorderManager { BOOL _meteringEnabled; } +StreamingModule* streamingModule; + @synthesize bridge = _bridge; RCT_EXPORT_MODULE(); @@ -237,6 +240,28 @@ - (NSString *) applicationDocumentsDirectory }]; } +RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) +{ + NSLog(@"PrepareStreaming"); + streamingModule = [[StreamingModule alloc] init]; + _audioFileURL = [NSURL fileURLWithPath:path]; +} + +RCT_EXPORT_METHOD(startStreaming) +{ + NSLog(@"startStreaming"); + NSLog(@"%@", _audioFileURL); + [streamingModule startRecording: CFBridgingRetain(_audioFileURL)]; +} + +RCT_EXPORT_METHOD(stopStreaming) +{ + NSLog(@"stopStreaming"); + [streamingModule stopRecording]; + +} + + - (NSString *)getPathForDirectory:(int)directory { NSArray *paths = NSSearchPathForDirectoriesInDomains(directory, NSUserDomainMask, YES); diff --git a/ios/RNAudio.xcodeproj/project.pbxproj b/ios/RNAudio.xcodeproj/project.pbxproj index 5e1326f0..c0eefd86 100644 --- a/ios/RNAudio.xcodeproj/project.pbxproj +++ b/ios/RNAudio.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXBuildFile section */ + 38D7625B1EDD3F58007B8DE3 /* StreamingModule.m in Sources */ = {isa = PBXBuildFile; fileRef = 38D762591EDD3F58007B8DE3 /* StreamingModule.m */; }; 429D457A1CFC96E100CBD51A /* AudioRecorderManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */; }; /* End PBXBuildFile section */ @@ -23,6 +24,8 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 38D762591EDD3F58007B8DE3 /* StreamingModule.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = StreamingModule.m; sourceTree = SOURCE_ROOT; }; + 38D7625A1EDD3F58007B8DE3 /* StreamingModule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = StreamingModule.h; sourceTree = SOURCE_ROOT; }; 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = AudioRecorderManager.m; sourceTree = SOURCE_ROOT; }; 429D45771CFC96E100CBD51A /* AudioRecorderManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AudioRecorderManager.h; sourceTree = SOURCE_ROOT; }; 42F559BA1CFC90C400DC3F84 /* libRNAudio.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNAudio.a; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -60,6 +63,8 @@ children = ( 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */, 429D45771CFC96E100CBD51A /* AudioRecorderManager.h */, + 38D762591EDD3F58007B8DE3 /* StreamingModule.m */, + 38D7625A1EDD3F58007B8DE3 /* StreamingModule.h */, ); path = RNAudio; sourceTree = ""; @@ -120,6 +125,7 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 38D7625B1EDD3F58007B8DE3 /* StreamingModule.m in Sources */, 429D457A1CFC96E100CBD51A /* AudioRecorderManager.m in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h new file mode 100644 index 00000000..e7d468b5 --- /dev/null +++ b/ios/StreamingModule.h @@ -0,0 +1,35 @@ +// +// StreamingModule.h +// RNAudio +// +// Created by JeungminOh on 30/05/2017. +// Copyright © 2017 Joshua Sierles. All rights reserved. +// + +#import +#import + +#define NUM_BUFFERS 3 +#define SECONDS_TO_RECORD 10 + +// Struct defining recording state +typedef struct +{ + AudioStreamBasicDescription dataFormat; + AudioQueueRef queue; + AudioQueueBufferRef buffers[NUM_BUFFERS]; + AudioFileID audioFile; + SInt64 currentPacket; + bool recording; +} RecordState; + +@interface StreamingModule : NSObject +{ + RecordState recordState; + CFURLRef fileURL; +} + +- (void)startRecording:(CFURLRef*)fileURL; +- (void)stopRecording; + +@end diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m new file mode 100644 index 00000000..f1f9a215 --- /dev/null +++ b/ios/StreamingModule.m @@ -0,0 +1,193 @@ +// +// StreamingModule.c +// RNAudio +// +// Created by JeungminOh on 30/05/2017. +// Copyright © 2017 Joshua Sierles. All rights reserved. +// + +#import "StreamingModule.h" +#import + +// Declare C callback functions +void AudioInputCallback(void * inUserData, // Custom audio metadata + AudioQueueRef inAQ, + AudioQueueBufferRef inBuffer, + const AudioTimeStamp * inStartTime, + UInt32 inNumberPacketDescriptions, + const AudioStreamPacketDescription * inPacketDescs); + +void AudioOutputCallback(void * inUserData, + AudioQueueRef outAQ, + AudioQueueBufferRef outBuffer); + + + +@implementation StreamingModule + +- init { + if (self = [super init]) { + // Your initialization code here + } + return self; +} + +static const int kNumberBuffers = 3; // 1 +struct AQRecorderState { + AudioStreamBasicDescription mDataFormat; // 2 + AudioQueueRef mQueue; // 3 + AudioQueueBufferRef mBuffers[kNumberBuffers]; // 4 + AudioFileID mAudioFile; // 5 + UInt32 bufferByteSize; // 6 + SInt64 mCurrentPacket; // 7 + bool mIsRunning; // 8 +}; + +static void HandleInputBuffer ( + void *aqData, + AudioQueueRef inAQ, + AudioQueueBufferRef inBuffer, + const AudioTimeStamp *inStartTime, + UInt32 inNumPackets, + const AudioStreamPacketDescription *inPacketDesc + ) { + AQRecorderState *pAqData = (AQRecorderState *) aqData; // 1 + + if (inNumPackets == 0 && // 2 + pAqData->mDataFormat.mBytesPerPacket != 0) + inNumPackets = + inBuffer->mAudioDataByteSize / pAqData->mDataFormat.mBytesPerPacket; + + if (AudioFileWritePackets ( // 3 + pAqData->mAudioFile, + false, + inBuffer->mAudioDataByteSize, + inPacketDesc, + pAqData->mCurrentPacket, + &inNumPackets, + inBuffer->mAudioData + ) == noErr) { + pAqData->mCurrentPacket += inNumPackets; // 4 + } + if (pAqData->mIsRunning == 0) // 5 + return; + + AudioQueueEnqueueBuffer ( // 6 + pAqData->mQueue, + inBuffer, + 0, + NULL + ); +} + +void DeriveBufferSize ( + AudioQueueRef audioQueue, // 1 + AudioStreamBasicDescription &ASBDescription, // 2 + Float64 seconds, // 3 + UInt32 *outBufferSize // 4 +) { + static const int maxBufferSize = 0x50000; // 5 + + int maxPacketSize = ASBDescription.mBytesPerPacket; // 6 + if (maxPacketSize == 0) { // 7 + UInt32 maxVBRPacketSize = sizeof(maxPacketSize); + AudioQueueGetProperty ( + audioQueue, + kAudioQueueProperty_MaximumOutputPacketSize, + // in Mac OS X v10.5, instead use + // kAudioConverterPropertyMaximumOutputPacketSize + &maxPacketSize, + &maxVBRPacketSize + ); + } + + Float64 numBytesForTime = + ASBDescription.mSampleRate * maxPacketSize * seconds; // 8 + *outBufferSize = + UInt32 (numBytesForTime < maxBufferSize ? + numBytesForTime : maxBufferSize); // 9 +} + + +AQRecorderState aqData; +- (void)startRecording:(CFURLRef*)fileURL +{ + // AQRecorderState aqData; // 1 + + aqData.mDataFormat.mFormatID = kAudioFormatLinearPCM; // 2 + aqData.mDataFormat.mSampleRate = 44100.0; // 3 + aqData.mDataFormat.mChannelsPerFrame = 2; // 4 + aqData.mDataFormat.mBitsPerChannel = 16; // 5 + aqData.mDataFormat.mBytesPerPacket = // 6 + aqData.mDataFormat.mBytesPerFrame = + aqData.mDataFormat.mChannelsPerFrame * sizeof (SInt16); + aqData.mDataFormat.mFramesPerPacket = 1; // 7 + + AudioFileTypeID fileType = kAudioFileAIFFType; // 8 + aqData.mDataFormat.mFormatFlags = // 9 + kLinearPCMFormatFlagIsBigEndian + | kLinearPCMFormatFlagIsSignedInteger + | kLinearPCMFormatFlagIsPacked; + + AudioQueueNewInput ( // 1 + &aqData.mDataFormat, // 2 + HandleInputBuffer, // 3 + &aqData, // 4 + NULL, // 5 + kCFRunLoopCommonModes, // 6 + 0, // 7 + &aqData.mQueue // 8 + ); + + UInt32 dataFormatSize = sizeof (aqData.mDataFormat); // 1 + + AudioQueueGetProperty ( // 2 + aqData.mQueue, // 3 + kAudioQueueProperty_StreamDescription, // 4 + // in Mac OS X, instead use + // kAudioConverterCurrentInputStreamDescription + &aqData.mDataFormat, // 5 + &dataFormatSize // 6 + ); + + DeriveBufferSize ( // 1 + aqData.mQueue, // 2 + aqData.mDataFormat, // 3 + 0.5, // 4 + &aqData.bufferByteSize // 5 + ); + + for (int i = 0; i < kNumberBuffers; ++i) { // 1 + AudioQueueAllocateBuffer ( // 2 + aqData.mQueue, // 3 + aqData.bufferByteSize, // 4 + &aqData.mBuffers[i] // 5 + ); + + AudioQueueEnqueueBuffer ( // 6 + aqData.mQueue, // 7 + aqData.mBuffers[i], // 8 + 0, // 9 + NULL // 10 + ); + } + + aqData.mCurrentPacket = 0; + aqData.mIsRunning = true; + AudioQueueStart(aqData.mQueue, NULL); + +} + +- (void)stopRecording +{ + // Wait, on user interface thread, until user stops the recording + AudioQueueStop (aqData.mQueue, true); + aqData.mIsRunning = false; +} + +- (void)dealloc +{ + CFRelease(fileURL); +} + +@end diff --git a/package.json b/package.json index e2bbbe5a..96a9d064 100644 --- a/package.json +++ b/package.json @@ -5,9 +5,7 @@ "main": "index.js", "author": "Joshua Sierles (https://github.com/jsierles)", "files": [ - "ios/AudioRecorderManager.m", - "ios/AudioRecorderManager.h", - "ios/RNAudio.xcodeproj", + "ios/*", "README.md", "LICENSE", "index.js", From 28b457417c27ad6849988c11849fd19081f60905 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 30 May 2017 21:05:15 +0900 Subject: [PATCH 05/26] Implement streaming recorder using AVAudioEngine (temp) --- ios/AudioRecorderManager.m | 5 +- ios/RNAudio.xcodeproj/project.pbxproj | 12 ++ ios/StreamingModule.h | 25 +--- ios/StreamingModule.m | 188 +++----------------------- 4 files changed, 36 insertions(+), 194 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 50991a00..8c24b5e8 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -244,6 +244,7 @@ - (NSString *) applicationDocumentsDirectory { NSLog(@"PrepareStreaming"); streamingModule = [[StreamingModule alloc] init]; + [streamingModule prepare]; _audioFileURL = [NSURL fileURLWithPath:path]; } @@ -251,13 +252,13 @@ - (NSString *) applicationDocumentsDirectory { NSLog(@"startStreaming"); NSLog(@"%@", _audioFileURL); - [streamingModule startRecording: CFBridgingRetain(_audioFileURL)]; + [streamingModule start]; } RCT_EXPORT_METHOD(stopStreaming) { NSLog(@"stopStreaming"); - [streamingModule stopRecording]; + [streamingModule stop]; } diff --git a/ios/RNAudio.xcodeproj/project.pbxproj b/ios/RNAudio.xcodeproj/project.pbxproj index c0eefd86..a1c6e8e4 100644 --- a/ios/RNAudio.xcodeproj/project.pbxproj +++ b/ios/RNAudio.xcodeproj/project.pbxproj @@ -9,6 +9,7 @@ /* Begin PBXBuildFile section */ 38D7625B1EDD3F58007B8DE3 /* StreamingModule.m in Sources */ = {isa = PBXBuildFile; fileRef = 38D762591EDD3F58007B8DE3 /* StreamingModule.m */; }; 429D457A1CFC96E100CBD51A /* AudioRecorderManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */; }; + 76A04C0C1EDD91B800516515 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 76A04C0B1EDD91B800516515 /* AVFoundation.framework */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -29,6 +30,7 @@ 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = AudioRecorderManager.m; sourceTree = SOURCE_ROOT; }; 429D45771CFC96E100CBD51A /* AudioRecorderManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AudioRecorderManager.h; sourceTree = SOURCE_ROOT; }; 42F559BA1CFC90C400DC3F84 /* libRNAudio.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNAudio.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 76A04C0B1EDD91B800516515 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -36,6 +38,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 76A04C0C1EDD91B800516515 /* AVFoundation.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -47,6 +50,7 @@ children = ( 42F559BC1CFC90C400DC3F84 /* RNAudio */, 42F559BB1CFC90C400DC3F84 /* Products */, + 76A04C0A1EDD91B700516515 /* Frameworks */, ); sourceTree = ""; }; @@ -69,6 +73,14 @@ path = RNAudio; sourceTree = ""; }; + 76A04C0A1EDD91B700516515 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 76A04C0B1EDD91B800516515 /* AVFoundation.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h index e7d468b5..aaf1e3f1 100644 --- a/ios/StreamingModule.h +++ b/ios/StreamingModule.h @@ -6,30 +6,15 @@ // Copyright © 2017 Joshua Sierles. All rights reserved. // -#import -#import - -#define NUM_BUFFERS 3 -#define SECONDS_TO_RECORD 10 - -// Struct defining recording state -typedef struct -{ - AudioStreamBasicDescription dataFormat; - AudioQueueRef queue; - AudioQueueBufferRef buffers[NUM_BUFFERS]; - AudioFileID audioFile; - SInt64 currentPacket; - bool recording; -} RecordState; +#import @interface StreamingModule : NSObject { - RecordState recordState; - CFURLRef fileURL; + AVAudioEngine *engine; } -- (void)startRecording:(CFURLRef*)fileURL; -- (void)stopRecording; +- (void)prepare; +- (void)start; +- (void)stop; @end diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index f1f9a215..3f3d3883 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -7,187 +7,31 @@ // #import "StreamingModule.h" -#import - -// Declare C callback functions -void AudioInputCallback(void * inUserData, // Custom audio metadata - AudioQueueRef inAQ, - AudioQueueBufferRef inBuffer, - const AudioTimeStamp * inStartTime, - UInt32 inNumberPacketDescriptions, - const AudioStreamPacketDescription * inPacketDescs); - -void AudioOutputCallback(void * inUserData, - AudioQueueRef outAQ, - AudioQueueBufferRef outBuffer); - - @implementation StreamingModule -- init { - if (self = [super init]) { - // Your initialization code here - } - return self; -} - -static const int kNumberBuffers = 3; // 1 -struct AQRecorderState { - AudioStreamBasicDescription mDataFormat; // 2 - AudioQueueRef mQueue; // 3 - AudioQueueBufferRef mBuffers[kNumberBuffers]; // 4 - AudioFileID mAudioFile; // 5 - UInt32 bufferByteSize; // 6 - SInt64 mCurrentPacket; // 7 - bool mIsRunning; // 8 -}; - -static void HandleInputBuffer ( - void *aqData, - AudioQueueRef inAQ, - AudioQueueBufferRef inBuffer, - const AudioTimeStamp *inStartTime, - UInt32 inNumPackets, - const AudioStreamPacketDescription *inPacketDesc - ) { - AQRecorderState *pAqData = (AQRecorderState *) aqData; // 1 - - if (inNumPackets == 0 && // 2 - pAqData->mDataFormat.mBytesPerPacket != 0) - inNumPackets = - inBuffer->mAudioDataByteSize / pAqData->mDataFormat.mBytesPerPacket; - - if (AudioFileWritePackets ( // 3 - pAqData->mAudioFile, - false, - inBuffer->mAudioDataByteSize, - inPacketDesc, - pAqData->mCurrentPacket, - &inNumPackets, - inBuffer->mAudioData - ) == noErr) { - pAqData->mCurrentPacket += inNumPackets; // 4 - } - if (pAqData->mIsRunning == 0) // 5 - return; - - AudioQueueEnqueueBuffer ( // 6 - pAqData->mQueue, - inBuffer, - 0, - NULL - ); -} - -void DeriveBufferSize ( - AudioQueueRef audioQueue, // 1 - AudioStreamBasicDescription &ASBDescription, // 2 - Float64 seconds, // 3 - UInt32 *outBufferSize // 4 -) { - static const int maxBufferSize = 0x50000; // 5 +- (void)prepare { + engine = [[AVAudioEngine alloc] init]; - int maxPacketSize = ASBDescription.mBytesPerPacket; // 6 - if (maxPacketSize == 0) { // 7 - UInt32 maxVBRPacketSize = sizeof(maxPacketSize); - AudioQueueGetProperty ( - audioQueue, - kAudioQueueProperty_MaximumOutputPacketSize, - // in Mac OS X v10.5, instead use - // kAudioConverterPropertyMaximumOutputPacketSize - &maxPacketSize, - &maxVBRPacketSize - ); - } - - Float64 numBytesForTime = - ASBDescription.mSampleRate * maxPacketSize * seconds; // 8 - *outBufferSize = - UInt32 (numBytesForTime < maxBufferSize ? - numBytesForTime : maxBufferSize); // 9 + AVAudioInputNode *input = [engine inputNode]; + AVAudioFormat *format = [input outputFormatForBus: 0]; + [input installTapOnBus: 0 bufferSize: 8192 format: format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { + // ‘buf' contains audio captured from input node at time 'when' + NSLog(@"%@", buf); + }]; } - -AQRecorderState aqData; -- (void)startRecording:(CFURLRef*)fileURL -{ - // AQRecorderState aqData; // 1 - - aqData.mDataFormat.mFormatID = kAudioFormatLinearPCM; // 2 - aqData.mDataFormat.mSampleRate = 44100.0; // 3 - aqData.mDataFormat.mChannelsPerFrame = 2; // 4 - aqData.mDataFormat.mBitsPerChannel = 16; // 5 - aqData.mDataFormat.mBytesPerPacket = // 6 - aqData.mDataFormat.mBytesPerFrame = - aqData.mDataFormat.mChannelsPerFrame * sizeof (SInt16); - aqData.mDataFormat.mFramesPerPacket = 1; // 7 - - AudioFileTypeID fileType = kAudioFileAIFFType; // 8 - aqData.mDataFormat.mFormatFlags = // 9 - kLinearPCMFormatFlagIsBigEndian - | kLinearPCMFormatFlagIsSignedInteger - | kLinearPCMFormatFlagIsPacked; - - AudioQueueNewInput ( // 1 - &aqData.mDataFormat, // 2 - HandleInputBuffer, // 3 - &aqData, // 4 - NULL, // 5 - kCFRunLoopCommonModes, // 6 - 0, // 7 - &aqData.mQueue // 8 - ); - - UInt32 dataFormatSize = sizeof (aqData.mDataFormat); // 1 - - AudioQueueGetProperty ( // 2 - aqData.mQueue, // 3 - kAudioQueueProperty_StreamDescription, // 4 - // in Mac OS X, instead use - // kAudioConverterCurrentInputStreamDescription - &aqData.mDataFormat, // 5 - &dataFormatSize // 6 - ); - - DeriveBufferSize ( // 1 - aqData.mQueue, // 2 - aqData.mDataFormat, // 3 - 0.5, // 4 - &aqData.bufferByteSize // 5 - ); - - for (int i = 0; i < kNumberBuffers; ++i) { // 1 - AudioQueueAllocateBuffer ( // 2 - aqData.mQueue, // 3 - aqData.bufferByteSize, // 4 - &aqData.mBuffers[i] // 5 - ); - - AudioQueueEnqueueBuffer ( // 6 - aqData.mQueue, // 7 - aqData.mBuffers[i], // 8 - 0, // 9 - NULL // 10 - ); +- (void)start { + NSError *error = nil; + if (![engine startAndReturnError:&error]) { + NSLog(@"engine failed to start: %@", error); + return; } - - aqData.mCurrentPacket = 0; - aqData.mIsRunning = true; - AudioQueueStart(aqData.mQueue, NULL); - -} - -- (void)stopRecording -{ - // Wait, on user interface thread, until user stops the recording - AudioQueueStop (aqData.mQueue, true); - aqData.mIsRunning = false; } -- (void)dealloc -{ - CFRelease(fileURL); +- (void)stop { + AVAudioInputNode *input = [engine inputNode]; + [input removeTapOnBus: 0]; } @end From ad16ae8f22911907e6c1976192f879437200d6d5 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 30 May 2017 22:28:45 +0900 Subject: [PATCH 06/26] Generate dataReceived event --- ios/AudioRecorderManager.m | 17 ++++++++++++++--- ios/StreamingModule.h | 5 +++-- ios/StreamingModule.m | 15 ++++++++++----- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 8c24b5e8..f5266a14 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -16,6 +16,7 @@ NSString *const AudioRecorderEventProgress = @"recordingProgress"; NSString *const AudioRecorderEventFinished = @"recordingFinished"; +NSString *const AudioRecorderEventDataReceived = @"dataReceived"; @implementation AudioRecorderManager { @@ -242,9 +243,19 @@ - (NSString *) applicationDocumentsDirectory RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) { - NSLog(@"PrepareStreaming"); + NSLog(@"prepareStreaming"); streamingModule = [[StreamingModule alloc] init]; - [streamingModule prepare]; + [streamingModule prepare:^(AVAudioPCMBuffer *buf){ + NSLog(@"%@", buf); + NSMutableArray *body = [[NSMutableArray alloc] init]; + float * const left = [buf floatChannelData][0]; + for(int i=0; i Date: Wed, 31 May 2017 11:15:47 +0900 Subject: [PATCH 07/26] Implement pause and stop streaming recording --- ios/AudioRecorderManager.m | 9 ++++++++- ios/StreamingModule.h | 1 + ios/StreamingModule.m | 20 ++++++++++++++++---- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index f5266a14..e794a6bb 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -269,8 +269,15 @@ - (NSString *) applicationDocumentsDirectory RCT_EXPORT_METHOD(stopStreaming) { NSLog(@"stopStreaming"); - [streamingModule pause]; + [streamingModule stop]; + +} +RCT_EXPORT_METHOD(pauseStreaming) +{ + NSLog(@"pauseStreaming"); + [streamingModule pause]; + } diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h index 5680450c..31a5bfed 100644 --- a/ios/StreamingModule.h +++ b/ios/StreamingModule.h @@ -17,5 +17,6 @@ - (void)prepare:(void(^)(AVAudioPCMBuffer *))handler; - (void)start; - (void)pause; +- (void)stop; @end diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index 4f5a259f..568217e3 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -24,9 +24,16 @@ - (void)prepare:(void(^)(AVAudioPCMBuffer *))handler { } - (void)start { - NSError *error = nil; - NSLog(@"%@", [engine inputNode]); + if (engine == nil) { + if (_completionHandler != nil) { + [self prepare:_completionHandler]; + } else { + NSLog(@"Have to prepare before start"); + return; + } + } + NSError *error = nil; if (![engine startAndReturnError:&error]) { NSLog(@"engine failed to start: %@", error); return; @@ -34,9 +41,14 @@ - (void)start { } - (void)pause { - // AVAudioInputNode *input = [engine inputNode]; - // [input removeTapOnBus: 0]; [engine pause]; } +- (void)stop { + AVAudioInputNode *input = [engine inputNode]; + [input removeTapOnBus: 0]; + [engine stop]; + engine = nil; +} + @end From e4ba227f63918a55e71332f13e3bba707d047b4e Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Wed, 31 May 2017 12:06:45 +0900 Subject: [PATCH 08/26] Write recording stream to file --- ios/AudioRecorderManager.m | 27 +++++++++++++++------------ ios/StreamingModule.h | 4 +++- ios/StreamingModule.m | 15 ++++++++++++--- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index e794a6bb..e07fa68d 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -244,19 +244,22 @@ - (NSString *) applicationDocumentsDirectory RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) { NSLog(@"prepareStreaming"); - streamingModule = [[StreamingModule alloc] init]; - [streamingModule prepare:^(AVAudioPCMBuffer *buf){ - NSLog(@"%@", buf); - NSMutableArray *body = [[NSMutableArray alloc] init]; - float * const left = [buf floatChannelData][0]; - for(int i=0; i Date: Fri, 9 Jun 2017 12:07:44 +0900 Subject: [PATCH 09/26] Supporting recording options (temporal commit) --- AudioExample/AudioExample.js | 3 +- .../AudioExample.xcodeproj/project.pbxproj | 8 +++ .../AppIcon.appiconset/Contents.json | 10 +++ AudioExample/iOS/AudioExample/Info.plist | 2 + ios/AudioRecorderManager.m | 66 +++++++++++++++++++ ios/StreamingModule.h | 11 ++-- ios/StreamingModule.m | 55 +++++++++++----- 7 files changed, 131 insertions(+), 24 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index bd73212e..8c6cbf76 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -42,11 +42,12 @@ class AudioExample extends Component { this.prepareRecordingPath(this.state.audioPath); console.log('hi3'); + console.log(this.state.audioPath); AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { SampleRate: 22050, Channels: 1, AudioQuality: "Low", - AudioEncoding: "aac", + AudioEncoding: "lpcm", AudioEncodingBitRate: 32000, }); console.log('hi2'); diff --git a/AudioExample/iOS/AudioExample.xcodeproj/project.pbxproj b/AudioExample/iOS/AudioExample.xcodeproj/project.pbxproj index 57c314fb..9ae1682b 100644 --- a/AudioExample/iOS/AudioExample.xcodeproj/project.pbxproj +++ b/AudioExample/iOS/AudioExample.xcodeproj/project.pbxproj @@ -528,8 +528,12 @@ TargetAttributes = { 00E356ED1AD99517003FC87E = { CreatedOnToolsVersion = 6.2; + DevelopmentTeam = SD72YP83U5; TestTargetID = 13B07F861A680F5B00A75B9A; }; + 13B07F861A680F5B00A75B9A = { + DevelopmentTeam = SD72YP83U5; + }; }; }; buildConfigurationList = 83CBB9FA1A601CBA00E9B192 /* Build configuration list for PBXProject "AudioExample" */; @@ -878,6 +882,7 @@ isa = XCBuildConfiguration; buildSettings = { BUNDLE_LOADER = "$(TEST_HOST)"; + DEVELOPMENT_TEAM = SD72YP83U5; GCC_PREPROCESSOR_DEFINITIONS = ( "DEBUG=1", "$(inherited)", @@ -900,6 +905,7 @@ buildSettings = { BUNDLE_LOADER = "$(TEST_HOST)"; COPY_PHASE_STRIP = NO; + DEVELOPMENT_TEAM = SD72YP83U5; INFOPLIST_FILE = AudioExampleTests/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 8.0; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; @@ -919,6 +925,7 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CURRENT_PROJECT_VERSION = 1; DEAD_CODE_STRIPPING = NO; + DEVELOPMENT_TEAM = SD72YP83U5; INFOPLIST_FILE = AudioExample/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; OTHER_LDFLAGS = ( @@ -936,6 +943,7 @@ buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_TEAM = SD72YP83U5; INFOPLIST_FILE = AudioExample/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; OTHER_LDFLAGS = ( diff --git a/AudioExample/iOS/AudioExample/Images.xcassets/AppIcon.appiconset/Contents.json b/AudioExample/iOS/AudioExample/Images.xcassets/AppIcon.appiconset/Contents.json index 118c98f7..b8236c65 100644 --- a/AudioExample/iOS/AudioExample/Images.xcassets/AppIcon.appiconset/Contents.json +++ b/AudioExample/iOS/AudioExample/Images.xcassets/AppIcon.appiconset/Contents.json @@ -1,5 +1,15 @@ { "images" : [ + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "3x" + }, { "idiom" : "iphone", "size" : "29x29", diff --git a/AudioExample/iOS/AudioExample/Info.plist b/AudioExample/iOS/AudioExample/Info.plist index 2fb6a11c..4728718c 100644 --- a/AudioExample/iOS/AudioExample/Info.plist +++ b/AudioExample/iOS/AudioExample/Info.plist @@ -38,6 +38,8 @@ NSLocationWhenInUseUsageDescription + NSMicrophoneUsageDescription + NSAppTransportSecurity diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index e07fa68d..e9cdc202 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -246,8 +246,74 @@ - (NSString *) applicationDocumentsDirectory NSLog(@"prepareStreaming"); _audioFileURL = [NSURL fileURLWithPath:path]; + // Default options + _audioQuality = [NSNumber numberWithInt:AVAudioQualityHigh]; + _audioEncoding = [NSNumber numberWithInt:kAudioFormatAppleIMA4]; + _audioChannels = [NSNumber numberWithInt:2]; + _audioSampleRate = [NSNumber numberWithFloat:44100.0]; + _meteringEnabled = NO; + + // Set audio quality from options + if (quality != nil) { + if ([quality isEqual: @"Low"]) { + _audioQuality =[NSNumber numberWithInt:AVAudioQualityLow]; + } else if ([quality isEqual: @"Medium"]) { + _audioQuality =[NSNumber numberWithInt:AVAudioQualityMedium]; + } else if ([quality isEqual: @"High"]) { + _audioQuality =[NSNumber numberWithInt:AVAudioQualityHigh]; + } + } + + // Set channels from options + if (channels != nil) { + _audioChannels = channels; + } + + // Set audio encoding from options + if (encoding != nil) { + if ([encoding isEqual: @"lpcm"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatLinearPCM]; + } else if ([encoding isEqual: @"ima4"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatAppleIMA4]; + } else if ([encoding isEqual: @"aac"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatMPEG4AAC]; + } else if ([encoding isEqual: @"MAC3"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatMACE3]; + } else if ([encoding isEqual: @"MAC6"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatMACE6]; + } else if ([encoding isEqual: @"ulaw"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatULaw]; + } else if ([encoding isEqual: @"alaw"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatALaw]; + } else if ([encoding isEqual: @"mp1"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatMPEGLayer1]; + } else if ([encoding isEqual: @"mp2"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatMPEGLayer2]; + } else if ([encoding isEqual: @"alac"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatAppleLossless]; + } else if ([encoding isEqual: @"amr"]) { + _audioEncoding =[NSNumber numberWithInt:kAudioFormatAMR]; + } + } + + // Set sample rate from options + _audioSampleRate = [NSNumber numberWithFloat:sampleRate]; + + NSDictionary *recordSettings = [NSDictionary dictionaryWithObjectsAndKeys: + //_audioQuality, AVEncoderAudioQualityKey, + //_audioEncoding, AVFormatIDKey, + _audioChannels, AVNumberOfChannelsKey, + _audioSampleRate, AVSampleRateKey, + nil]; + + // Enable metering from options + if (meteringEnabled != NO) { + _meteringEnabled = meteringEnabled; + } + streamingModule = [[StreamingModule alloc] init]; [streamingModule prepare:_audioFileURL + settings:recordSettings handler:^(AVAudioPCMBuffer *buf){ NSLog(@"%@", buf); NSMutableArray *body = [[NSMutableArray alloc] init]; diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h index 03e8914e..d74b8d46 100644 --- a/ios/StreamingModule.h +++ b/ios/StreamingModule.h @@ -10,13 +10,14 @@ @interface StreamingModule : NSObject { - AVAudioEngine *engine; - void (^_completionHandler)(AVAudioPCMBuffer *buf); - NSTimeInterval currentTime; - NSURL *fileUrl; + AVAudioEngine *_engine; + void (^_audioDataReceived)(AVAudioPCMBuffer *buf); + NSTimeInterval _currentTime; + NSURL *_fileUrl; + NSDictionary *_settings; } -- (void)prepare:(NSURL*)recordingFileUrl handler:(void(^)(AVAudioPCMBuffer *))handler; +- (void)prepare:(NSURL*)recordingFileUrl settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler; - (void)start; - (void)pause; - (void)stop; diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index a229171e..fffda340 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -10,32 +10,51 @@ @implementation StreamingModule -- (void)prepare:(NSURL *)recordingFileUrl handler:(void(^)(AVAudioPCMBuffer *))handler { - _completionHandler = [handler copy]; - fileUrl = recordingFileUrl; +- (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler { + _audioDataReceived = [handler copy]; + _fileUrl = recordingFileUrl; + _settings = settings; - engine = [[AVAudioEngine alloc] init]; + _engine = [[AVAudioEngine alloc] init]; - AVAudioInputNode *input = [engine inputNode]; - AVAudioFormat *format = [input outputFormatForBus: 0]; + AVAudioInputNode *input = [_engine inputNode]; + AVAudioMixerNode *mainMixer = [_engine mainMixerNode]; + // [mainMixer setOutputVolume: 0.0]; + AVAudioOutputNode *output = [_engine outputNode]; + + NSLog(@"Prepare"); + NSLog(@"%@", [settings description]); + + //AVAudioFormat *format = [[AVAudioFormat alloc] initWithSettings:settings]; //[input outputFormatForBus: 0]; // + + AVAudioFormat *format = [[AVAudioFormat alloc] initStandardFormatWithSampleRate:22050 channels:1]; + + // AVAudioFormat *format = [mainMixer outputFormatForBus: 0]; + NSLog(@"%@", [format description]); + + + [_engine connect:input to:mainMixer format:[input inputFormatForBus:0]]; + [_engine connect:mainMixer to:output format:format]; NSError *error = nil; - AVAudioFile *file = [[AVAudioFile alloc] initForWriting:fileUrl + AVAudioFile *file = [[AVAudioFile alloc] initForWriting:_fileUrl settings:format.settings error:&error]; - [input installTapOnBus: 0 bufferSize: 8192 format: format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { + NSLog(@"InstallTapOnBus"); + + [mainMixer installTapOnBus: 0 bufferSize: 8192 format: format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { // ‘buf' contains audio captured from input node at time 'when' - _completionHandler(buf); + _audioDataReceived(buf); NSError *wrtieFromBufferError = nil; [file writeFromBuffer:buf error:&wrtieFromBufferError]; }]; } - (void)start { - if (engine == nil) { - if (_completionHandler != nil && fileUrl != nil) { - [self prepare:fileUrl handler:_completionHandler]; + if (_engine == nil) { + if (_audioDataReceived != nil && _fileUrl != nil && _settings != nil) { + [self prepare:_fileUrl settings:_settings handler:_audioDataReceived]; } else { NSLog(@"Have to prepare before start"); return; @@ -43,21 +62,21 @@ - (void)start { } NSError *error = nil; - if (![engine startAndReturnError:&error]) { + if (![_engine startAndReturnError:&error]) { NSLog(@"engine failed to start: %@", error); return; } } - (void)pause { - [engine pause]; + [_engine pause]; } - (void)stop { - AVAudioInputNode *input = [engine inputNode]; - [input removeTapOnBus: 0]; - [engine stop]; - engine = nil; + AVAudioMixerNode *mainMixer = [_engine mainMixerNode]; + [mainMixer removeTapOnBus: 0]; + [_engine stop]; + _engine = nil; } @end From d75602041508e3d46da2ed26fb8e6e050ac443d1 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Mon, 19 Jun 2017 10:45:53 +0900 Subject: [PATCH 10/26] Support recording format * Currently, the number of audio channels and sample rate are supported. --- AudioExample/AudioExample.js | 2 +- ios/StreamingModule.h | 1 + ios/StreamingModule.m | 32 +++++++++++++++++++------------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 8c6cbf76..21f27b70 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -45,7 +45,7 @@ class AudioExample extends Component { console.log(this.state.audioPath); AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { SampleRate: 22050, - Channels: 1, + Channels: 2, AudioQuality: "Low", AudioEncoding: "lpcm", AudioEncodingBitRate: 32000, diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h index d74b8d46..39db5a6f 100644 --- a/ios/StreamingModule.h +++ b/ios/StreamingModule.h @@ -15,6 +15,7 @@ NSTimeInterval _currentTime; NSURL *_fileUrl; NSDictionary *_settings; + AVAudioMixerNode *_downMixer; } - (void)prepare:(NSURL*)recordingFileUrl settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler; diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index fffda340..c50f81f6 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -17,25 +17,30 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl _engine = [[AVAudioEngine alloc] init]; + /* + // Initialize audio session + AVAudioSession *audioSession = [AVAudioSession sharedInstance]; + NSError *setCateegoryError = nil; + [audioSession setCategory:AVAudioSessionCategoryPlayAndRecord error:&setCateegoryError]; + */ + AVAudioInputNode *input = [_engine inputNode]; + _downMixer = [[AVAudioMixerNode alloc] init]; AVAudioMixerNode *mainMixer = [_engine mainMixerNode]; - // [mainMixer setOutputVolume: 0.0]; - AVAudioOutputNode *output = [_engine outputNode]; NSLog(@"Prepare"); NSLog(@"%@", [settings description]); - //AVAudioFormat *format = [[AVAudioFormat alloc] initWithSettings:settings]; //[input outputFormatForBus: 0]; // - - AVAudioFormat *format = [[AVAudioFormat alloc] initStandardFormatWithSampleRate:22050 channels:1]; + AVAudioFormat *format = [[AVAudioFormat alloc] initStandardFormatWithSampleRate: [_settings[AVSampleRateKey] doubleValue] + channels: [_settings[AVNumberOfChannelsKey] intValue]]; - // AVAudioFormat *format = [mainMixer outputFormatForBus: 0]; NSLog(@"%@", [format description]); - - [_engine connect:input to:mainMixer format:[input inputFormatForBus:0]]; - [_engine connect:mainMixer to:output format:format]; - + [_engine attachNode:_downMixer]; + [_engine connect:input to:_downMixer format:[input inputFormatForBus:0]]; + [_downMixer setVolume:0]; + [_engine connect:_downMixer to:mainMixer format:format]; + NSError *error = nil; AVAudioFile *file = [[AVAudioFile alloc] initForWriting:_fileUrl settings:format.settings @@ -43,12 +48,14 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl NSLog(@"InstallTapOnBus"); - [mainMixer installTapOnBus: 0 bufferSize: 8192 format: format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { + [_downMixer installTapOnBus: 0 bufferSize: 8192 format: format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { // ‘buf' contains audio captured from input node at time 'when' _audioDataReceived(buf); NSError *wrtieFromBufferError = nil; [file writeFromBuffer:buf error:&wrtieFromBufferError]; }]; + + [_engine prepare]; } - (void)start { @@ -73,8 +80,7 @@ - (void)pause { } - (void)stop { - AVAudioMixerNode *mainMixer = [_engine mainMixerNode]; - [mainMixer removeTapOnBus: 0]; + [_downMixer removeTapOnBus: 0]; [_engine stop]; _engine = nil; } From f20371e10a8bbe43834d496d7f2de40b2f0d1929 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Mon, 19 Jun 2017 13:17:58 +0900 Subject: [PATCH 11/26] Implement recorder eventProgress and eventFinished callback --- AudioExample/AudioExample.js | 26 ++++++---------- index.js | 58 ++---------------------------------- ios/AudioRecorderManager.m | 27 +++++++++++++---- ios/StreamingModule.h | 6 +++- ios/StreamingModule.m | 14 ++++----- 5 files changed, 46 insertions(+), 85 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 21f27b70..6867e871 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -25,13 +25,14 @@ class AudioExample extends Component { }; prepareRecordingPath(audioPath){ - AudioRecorder.prepareRecordingAtPath(audioPath, { - SampleRate: 22050, - Channels: 1, - AudioQuality: "Low", - AudioEncoding: "aac", - AudioEncodingBitRate: 32000 - }); + AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { + SampleRate: 22050, + Channels: 2, + // Following is only supported in Android + AudioQuality: "Low", + AudioEncoding: "aac", + AudioEncodingBitRate: 32000, + }); } componentDidMount() { @@ -41,16 +42,7 @@ class AudioExample extends Component { if (!hasPermission) return; this.prepareRecordingPath(this.state.audioPath); - console.log('hi3'); console.log(this.state.audioPath); - AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { - SampleRate: 22050, - Channels: 2, - AudioQuality: "Low", - AudioEncoding: "lpcm", - AudioEncodingBitRate: 32000, - }); - console.log('hi2'); console.log(AudioRecorder); AudioRecorder.onProgress = (data) => { this.setState({currentTime: Math.floor(data.currentTime)}); @@ -107,7 +99,7 @@ class AudioExample extends Component { this.setState({stoppedRecording: true, recording: false}); try { - const filePath = await AudioRecorder.pauseRecording(); + const filePath = await AudioRecorder.pauseStreaming(); // Pause is currently equivalent to stop on Android. if (Platform.OS === 'android') { diff --git a/index.js b/index.js index 23448055..b9f84763 100644 --- a/index.js +++ b/index.js @@ -67,67 +67,15 @@ var AudioRecorder = { return AudioRecorderManager.prepareStreamingAtPath(path, recordingOptions); } }, - prepareRecordingAtPath: function(path, options) { - if (this.progressSubscription) this.progressSubscription.remove(); - this.progressSubscription = NativeAppEventEmitter.addListener('recordingProgress', - (data) => { - if (this.onProgress) { - this.onProgress(data); - } - } - ); - - if (this.finishedSubscription) this.finishedSubscription.remove(); - this.finishedSubscription = NativeAppEventEmitter.addListener('recordingFinished', - (data) => { - if (this.onFinished) { - this.onFinished(data); - } - } - ); - - var defaultOptions = { - SampleRate: 44100.0, - Channels: 2, - AudioQuality: 'High', - AudioEncoding: 'ima4', - OutputFormat: 'mpeg_4', - MeteringEnabled: false, - AudioEncodingBitRate: 32000 - }; - - var recordingOptions = {...defaultOptions, ...options}; - - if (Platform.OS === 'ios') { - AudioRecorderManager.prepareRecordingAtPath( - path, - recordingOptions.SampleRate, - recordingOptions.Channels, - recordingOptions.AudioQuality, - recordingOptions.AudioEncoding, - recordingOptions.MeteringEnabled - ); - } else { - return AudioRecorderManager.prepareRecordingAtPath(path, recordingOptions); - } - }, - startRecording: function() { - return AudioRecorderManager.startRecording(); - }, - pauseRecording: function() { - return AudioRecorderManager.pauseRecording(); - }, - stopRecording: function() { - return AudioRecorderManager.stopRecording(); - }, startStreaming: function() { - console.log(AudioRecorderManager); - console.log('???'); return AudioRecorderManager.startStreaming(); }, stopStreaming: function() { return AudioRecorderManager.stopStreaming(); }, + pauseStreaming: function() { + return AudioRecorderManager.pauseStreaming(); + }, checkAuthorizationStatus: AudioRecorderManager.checkAuthorizationStatus, requestAuthorization: AudioRecorderManager.requestAuthorization, removeListeners: function() { diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index e9cdc202..48924d5d 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -20,7 +20,7 @@ @implementation AudioRecorderManager { - AVAudioRecorder *_audioRecorder; + // AVAudioRecorder *_audioRecorder; NSTimeInterval _currentTime; id _progressUpdateTimer; @@ -42,8 +42,8 @@ @implementation AudioRecorderManager { RCT_EXPORT_MODULE(); - (void)sendProgressUpdate { - if (_audioRecorder && _audioRecorder.recording) { - _currentTime = _audioRecorder.currentTime; + if (streamingModule && streamingModule->recording) { + _currentTime = streamingModule->currentTime; } else { return; } @@ -52,11 +52,13 @@ - (void)sendProgressUpdate { (([_prevProgressUpdateTime timeIntervalSinceNow] * -1000.0) >= _progressUpdateInterval)) { NSMutableDictionary *body = [[NSMutableDictionary alloc] init]; [body setObject:[NSNumber numberWithFloat:_currentTime] forKey:@"currentTime"]; + /* if (_meteringEnabled) { [_audioRecorder updateMeters]; float _currentMetering = [_audioRecorder averagePowerForChannel: 0]; [body setObject:[NSNumber numberWithFloat:_currentMetering] forKey:@"currentMetering"]; } + */ [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventProgress body:body]; @@ -78,12 +80,21 @@ - (void)startProgressTimer { [_progressUpdateTimer addToRunLoop:[NSRunLoop mainRunLoop] forMode:NSDefaultRunLoopMode]; } +- (void)finishRecording:(BOOL)flag { + [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventFinished body:@{ + @"status": flag ? @"OK" : @"ERROR", + @"audioFileURL": [_audioFileURL absoluteString] + }]; +} + +/* - (void)audioRecorderDidFinishRecording:(AVAudioRecorder *)recorder successfully:(BOOL)flag { [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventFinished body:@{ @"status": flag ? @"OK" : @"ERROR", @"audioFileURL": [_audioFileURL absoluteString] }]; } + */ - (NSString *) applicationDocumentsDirectory { @@ -92,6 +103,7 @@ - (NSString *) applicationDocumentsDirectory return basePath; } +/* RCT_EXPORT_METHOD(prepareRecordingAtPath:(NSString *)path sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) { _prevProgressUpdateTime = nil; @@ -209,6 +221,7 @@ - (NSString *) applicationDocumentsDirectory [_audioRecorder pause]; } } +*/ RCT_EXPORT_METHOD(checkAuthorizationStatus:(RCTPromiseResolveBlock)resolve reject:(__unused RCTPromiseRejectBlock)reject) { @@ -332,6 +345,8 @@ - (NSString *) applicationDocumentsDirectory { NSLog(@"startStreaming"); NSLog(@"%@", _audioFileURL); + [self startProgressTimer]; + [_recordSession setActive:YES error:nil]; [streamingModule start]; } @@ -339,14 +354,16 @@ - (NSString *) applicationDocumentsDirectory { NSLog(@"stopStreaming"); [streamingModule stop]; - + [_recordSession setActive:NO error:nil]; + _prevProgressUpdateTime = nil; + [self finishRecording: true]; } RCT_EXPORT_METHOD(pauseStreaming) { NSLog(@"pauseStreaming"); + [self stopProgressTimer]; [streamingModule pause]; - } diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h index 39db5a6f..00e40b0f 100644 --- a/ios/StreamingModule.h +++ b/ios/StreamingModule.h @@ -12,10 +12,14 @@ { AVAudioEngine *_engine; void (^_audioDataReceived)(AVAudioPCMBuffer *buf); - NSTimeInterval _currentTime; NSURL *_fileUrl; NSDictionary *_settings; AVAudioMixerNode *_downMixer; + NSTimeInterval _startTime; + + @public + bool recording; + NSTimeInterval currentTime; } - (void)prepare:(NSURL*)recordingFileUrl settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler; diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index c50f81f6..4a0975e6 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -17,13 +17,6 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl _engine = [[AVAudioEngine alloc] init]; - /* - // Initialize audio session - AVAudioSession *audioSession = [AVAudioSession sharedInstance]; - NSError *setCateegoryError = nil; - [audioSession setCategory:AVAudioSessionCategoryPlayAndRecord error:&setCateegoryError]; - */ - AVAudioInputNode *input = [_engine inputNode]; _downMixer = [[AVAudioMixerNode alloc] init]; AVAudioMixerNode *mainMixer = [_engine mainMixerNode]; @@ -50,6 +43,7 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl [_downMixer installTapOnBus: 0 bufferSize: 8192 format: format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { // ‘buf' contains audio captured from input node at time 'when' + currentTime = when.sampleTime / when.sampleRate - _startTime; _audioDataReceived(buf); NSError *wrtieFromBufferError = nil; [file writeFromBuffer:buf error:&wrtieFromBufferError]; @@ -70,19 +64,25 @@ - (void)start { NSError *error = nil; if (![_engine startAndReturnError:&error]) { + NSLog(@"engine failed to start: %@", error); return; + } else { + _startTime = _downMixer.lastRenderTime.sampleTime / _downMixer.lastRenderTime.sampleRate; + recording = true; } } - (void)pause { [_engine pause]; + recording = false; } - (void)stop { [_downMixer removeTapOnBus: 0]; [_engine stop]; _engine = nil; + recording = false; } @end From 59ad41b5d42fec18ad4d710b6f22a2c6d55ca8fb Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 20 Jun 2017 21:03:22 +0900 Subject: [PATCH 12/26] Support format in Android --- AudioExample/AudioExample.js | 12 ++-- .../rnim/rn/audio/AudioRecorderManager.java | 59 +++++++++++++++---- .../com/rnim/rn/audio/RecordWaveTask.java | 57 ++++++++++++------ index.js | 1 + ios/AudioRecorderManager.m | 2 +- 5 files changed, 95 insertions(+), 36 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 6867e871..140062fd 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -20,18 +20,18 @@ class AudioExample extends Component { recording: false, stoppedRecording: false, finished: false, - audioPath: AudioUtils.DocumentDirectoryPath + '/test.wav', + audioPath: AudioUtils.DownloadsDirectoryPath + '/test.wav', hasPermission: undefined, }; prepareRecordingPath(audioPath){ AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { SampleRate: 22050, - Channels: 2, - // Following is only supported in Android - AudioQuality: "Low", - AudioEncoding: "aac", - AudioEncodingBitRate: 32000, + Channels: 1, + // Following is not supported + // AudioQuality: "Low", + // AudioEncoding: "aac", + // AudioEncodingBitRate: 32000, }); } diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index c76eba37..96d2a260 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -171,21 +171,52 @@ private int getOutputFormatFromString(String outputFormat) { @ReactMethod public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSettings, Promise promise) { - File wavFile = new File(recordingPath); - recordTask = new RecordWaveTask(context); - recordTask.setOutputFile(wavFile); - recordTask.setStreamListener(new RecordWaveTask.OnStreamListener() { - @Override - public void onDataReceived(byte[] buffer) { - Log.d("onDataReceived", buffer.length + ""); - WritableArray body = Arguments.createArray(); - for (byte value: buffer) { - body.pushInt((int) value); + try { + File wavFile = new File(recordingPath); + recordTask = new RecordWaveTask(context); + + recordTask.setAudioSource(MediaRecorder.AudioSource.MIC); + + if (recordingSettings.hasKey("SampleRate")) { + recordTask.setSampleRate(recordingSettings.getInt("SampleRate")); + } + + if (recordingSettings.hasKey("Channels")) { + int channels = recordingSettings.getInt("Channels"); + int channelMask = AudioFormat.CHANNEL_IN_STEREO; + if (channels == 1) { + channelMask = AudioFormat.CHANNEL_IN_MONO; } - sendEvent("dataReceived", body); + recordTask.setChannelMask(channelMask); } - }); + + recordTask.setOutputFile(wavFile); + recordTask.setStreamListener(new RecordWaveTask.OnStreamListener() { + + @Override + public void onDataReceived(byte[] buffer) { + Log.d("onDataReceived", buffer.length + ""); + WritableArray body = Arguments.createArray(); + for (byte value: buffer) { + body.pushInt((int) value); + } + sendEvent("dataReceived", body); + } + }); + + // int outputFormat = getOutputFormatFromString(recordingSettings.getString("OutputFormat")); + // recorder.setOutputFormat(outputFormat); + // int audioEncoder = getAudioEncoderFromString(recordingSettings.getString("AudioEncoding")); + // recorder.setAudioEncoder(audioEncoder); + // recorder.setAudioEncodingBitRate(recordingSettings.getInt("AudioEncodingBitRate")); + // recorder.setOutputFile(recordingPath); + } + catch(final Exception e) { + logAndRejectPromise(promise, "COULDNT_CONFIGURE_MEDIA_RECORDER" , "Make sure you've added RECORD_AUDIO permission to your AndroidManifest.xml file "+e.getMessage()); + return; + } + currentOutputFile = recordingPath; } @@ -218,11 +249,14 @@ public void startStreaming(Promise promise){ @ReactMethod public void stopStreaming(final Promise promise){ + Log.d("RecordWaveTask", "stopStreaming"); if (!recordTask.isCancelled() && recordTask.getStatus() == AsyncTask.Status.RUNNING) { + Log.d("RecordWaveTask", "stopStreaming2"); isRecording = false; recordTask.setCancelCompleteListener(new RecordWaveTask.OnCancelCompleteListener() { @Override public void onCancelCompleted() { + Log.d("RecordWaveTask", "onCancelCompleted"); recordTask = null; promise.resolve(currentOutputFile); sendEvent("recordingFinished", null); @@ -231,6 +265,7 @@ public void onCancelCompleted() { recordTask.cancel(false); stopTimer(); } else { + Log.d("RecordWaveTask", "Task not running."); // Toast.makeText(context, "Task not running.", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call startStreaming before stopping streaming"); } diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index 63c92942..ba378db4 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -24,14 +24,11 @@ public class RecordWaveTask extends AsyncTask { - // Configure me! - private static final int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC; - private static final int SAMPLE_RATE = 44100; // Hz - private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT; - private static final int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; - // - - private static final int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); + // Default value + private int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC; + private int SAMPLE_RATE = 44100; // Hz + private int ENCODING = AudioFormat.ENCODING_PCM_16BIT; + private int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; private Context ctx; private File outputFile; @@ -44,6 +41,14 @@ public void setContext(Context ctx) { this.ctx = ctx; } + public void setAudioSource(int audioSource) { this.AUDIO_SOURCE = audioSource; } + + public void setSampleRate(int sampleRate) { this.SAMPLE_RATE = sampleRate; } + + public void setEncoding(int encoding) { this.ENCODING = encoding; } + + public void setChannelMask(int channelMask) { this.CHANNEL_MASK = channelMask; } + public void setOutputFile(File file) { this.outputFile = file; } // Step 1 - This interface defines the type of messages I want to communicate to my owner @@ -70,11 +75,11 @@ public void setStreamListener(OnStreamListener listener) { * AudioRecord until it reaches 4GB or is stopped by the user. It then goes back and updates * the WAV header to include the proper final chunk sizes. * - * @param files Index 0 should be the file to write to * @return Either an Exception (error) or two longs, the filesize, elapsed time in ms (success) */ @Override - protected Object[] doInBackground(File... files) { + protected Object[] doInBackground(File... unused) { + int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); AudioRecord audioRecord = null; FileOutputStream wavOut = null; long startTime = 0; @@ -125,10 +130,12 @@ protected Object[] doInBackground(File... files) { } catch (IOException ex) { return new Object[]{ex}; } finally { + Log.d("RecordWaveTask", "Finally"); if (audioRecord != null) { try { if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) { audioRecord.stop(); + Log.d("RecordWaveTask", "audioRecord.stop()"); endTime = SystemClock.elapsedRealtime(); } } catch (IllegalStateException ex) { @@ -141,8 +148,9 @@ protected Object[] doInBackground(File... files) { if (wavOut != null) { try { wavOut.close(); + Log.d("RecordWaveTask", "wavOut.close()"); } catch (IOException ex) { - // + Log.d("RecordWaveTask", ex.getMessage()); } } } @@ -150,12 +158,20 @@ protected Object[] doInBackground(File... files) { try { // This is not put in the try/catch/finally above since it needs to run // after we close the FileOutputStream - updateWavHeader(files[0]); + Log.d("RecordWaveTask", "updateWavHeaderPrev"); + this.updateWavHeader(this.outputFile); + Log.d("RecordWaveTask", "updateWavHeaderAfter"); } catch (IOException ex) { + Log.d("RecordWaveTask", "???"); + Log.d("RecordWaveTask", ex.getMessage()); return new Object[] { ex }; } - return new Object[] { files[0].length(), endTime - startTime }; + Log.d("RecordWaveTask", "Bye"); + Log.d("RecordWaveTask", (endTime - startTime) + "" ); + Log.d("RecordWaveTask", this.outputFile.length() + "" ); + + return new Object[] { this.outputFile.length(), endTime - startTime }; } /** @@ -249,6 +265,7 @@ private static void writeWavHeader(OutputStream out, short channels, int sampleR * @throws IOException */ private static void updateWavHeader(File wav) throws IOException { + Log.d("RecordWaveTask", "updateWavHeader0"); byte[] sizes = ByteBuffer .allocate(8) .order(ByteOrder.LITTLE_ENDIAN) @@ -258,7 +275,7 @@ private static void updateWavHeader(File wav) throws IOException { .putInt((int) (wav.length() - 8)) // ChunkSize .putInt((int) (wav.length() - 44)) // Subchunk2Size .array(); - + Log.d("RecordWaveTask", "updateWavHeader1"); RandomAccessFile accessWave = null; //noinspection CaughtExceptionImmediatelyRethrown try { @@ -267,9 +284,13 @@ private static void updateWavHeader(File wav) throws IOException { accessWave.seek(4); accessWave.write(sizes, 0, 4); + Log.d("RecordWaveTask", "updateWavHeader2"); + // Subchunk2Size accessWave.seek(40); accessWave.write(sizes, 4, 4); + + Log.d("RecordWaveTask", "updateWavHeader3"); } catch (IOException ex) { // Rethrow but we still close accessWave in our finally throw ex; @@ -287,11 +308,13 @@ private static void updateWavHeader(File wav) throws IOException { @Override protected void onCancelled(Object[] results) { // Handling cancellations and successful runs in the same way + Log.d("RecordWaveTask", "onCancelled"); onPostExecute(results); } @Override protected void onPostExecute(Object[] results) { + Log.d("RecordWaveTask", "onPostExecute"); Throwable throwable = null; if (results[0] instanceof Throwable) { // Error @@ -305,11 +328,11 @@ protected void onPostExecute(Object[] results) { // Display final recording stats double size = (long) results[0] / 1000000.00; long time = (long) results[1] / 1000; - // Toast.makeText(ctx, String.format(Locale.getDefault(), "%.2f MB / %d seconds", - // size, time), Toast.LENGTH_LONG).show(); + Toast.makeText(ctx, String.format(Locale.getDefault(), "%.2f MB / %d seconds", + size, time), Toast.LENGTH_LONG).show(); } else { // Error - // Toast.makeText(ctx, throwable.getLocalizedMessage(), Toast.LENGTH_LONG).show(); + Toast.makeText(ctx, throwable.getLocalizedMessage(), Toast.LENGTH_LONG).show(); } } diff --git a/index.js b/index.js index b9f84763..51c0188c 100644 --- a/index.js +++ b/index.js @@ -25,6 +25,7 @@ var AudioRecorder = { if (this.finishedSubscription) this.finishedSubscription.remove(); this.finishedSubscription = NativeAppEventEmitter.addListener('recordingFinished', (data) => { + console.log('recordingFinished()'); if (this.onFinished) { this.onFinished(data); } diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 48924d5d..936fcc52 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -262,7 +262,7 @@ - (NSString *) applicationDocumentsDirectory // Default options _audioQuality = [NSNumber numberWithInt:AVAudioQualityHigh]; _audioEncoding = [NSNumber numberWithInt:kAudioFormatAppleIMA4]; - _audioChannels = [NSNumber numberWithInt:2]; + _audioChannels = [NSNumber numberWithInt:1]; _audioSampleRate = [NSNumber numberWithFloat:44100.0]; _meteringEnabled = NO; From cdebdd1a2150627af3cd7b33c984ff339d599d20 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 20 Jun 2017 22:11:47 +0900 Subject: [PATCH 13/26] Fix bugs --- AudioExample/AudioExample.js | 2 +- .../java/com/rnim/rn/audio/RecordWaveTask.java | 4 +++- ios/StreamingModule.m | 15 +++++++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 140062fd..08be7107 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -20,7 +20,7 @@ class AudioExample extends Component { recording: false, stoppedRecording: false, finished: false, - audioPath: AudioUtils.DownloadsDirectoryPath + '/test.wav', + audioPath: AudioUtils.DocumentDirectoryPath + '/test.wav', hasPermission: undefined, }; diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index ba378db4..b86963a4 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -117,8 +117,10 @@ protected Object[] doInBackground(File... unused) { this.streamListener.onDataReceived(tmpBuffer); } run = false; - } else { + } else if (read >= 0) { // Write out the entire read buffer + Log.d("RecordWaveTask", read + ""); + Log.d("RecordWaveTask", buffer.length + ""); wavOut.write(buffer, 0, read); total += read; if (this.streamListener != null) { diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index 4a0975e6..ce17d45d 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -24,8 +24,19 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl NSLog(@"Prepare"); NSLog(@"%@", [settings description]); - AVAudioFormat *format = [[AVAudioFormat alloc] initStandardFormatWithSampleRate: [_settings[AVSampleRateKey] doubleValue] - channels: [_settings[AVNumberOfChannelsKey] intValue]]; + + AVAudioFormat *format = + [[AVAudioFormat alloc] initWithCommonFormat: AVAudioPCMFormatFloat32 + sampleRate: [_settings[AVSampleRateKey] doubleValue] + channels: [_settings[AVNumberOfChannelsKey] intValue] + interleaved: NO + ]; + + /* + AVAudioFormat *format = + [[AVAudioFormat alloc] initStandardFormatWithSampleRate: [_settings[AVSampleRateKey] doubleValue] + channels: [_settings[AVNumberOfChannelsKey] intValue]]; + */ NSLog(@"%@", [format description]); From 118c21ef2bc1245f49e21eff532e70e75c311025 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Mon, 17 Jul 2017 20:48:39 +0900 Subject: [PATCH 14/26] Convert AVAudioPCMFormatFloat32 to AVAudioPCMFormatInt16 --- ios/AudioRecorderManager.m | 4 ++-- ios/StreamingModule.m | 47 +++++++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 936fcc52..c31bd2e5 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -330,9 +330,9 @@ - (NSString *) applicationDocumentsDirectory handler:^(AVAudioPCMBuffer *buf){ NSLog(@"%@", buf); NSMutableArray *body = [[NSMutableArray alloc] init]; - float * const left = [buf floatChannelData][0]; + int16_t * const left = [buf int16ChannelData][0]; for(int i=0; imNumberBuffers); + NSLog(@"audioBufferList2: %d", [pcmInt16Buffer audioBufferList]->mNumberBuffers); + + NSLog(@"audioBufferList1: %d", [buf audioBufferList]->mBuffers->mDataByteSize); + NSLog(@"audioBufferList2: %d", [pcmInt16Buffer audioBufferList]->mBuffers->mDataByteSize); + + // _audioDataReceived(pcmInt16Buffer); NSError *wrtieFromBufferError = nil; - [file writeFromBuffer:buf error:&wrtieFromBufferError]; + [file writeFromBuffer:pcmInt16Buffer error:&wrtieFromBufferError]; + if (wrtieFromBufferError != nil) { + NSLog(@"%@", wrtieFromBufferError); + } }]; [_engine prepare]; From 7bcebdac3ba43bd64580b956d6007459354cc94f Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 18 Jul 2017 09:56:31 +0900 Subject: [PATCH 15/26] Commit missing change --- ios/StreamingModule.m | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index 4406ec14..1e9c5402 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -93,7 +93,8 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl NSLog(@"audioBufferList1: %d", [buf audioBufferList]->mBuffers->mDataByteSize); NSLog(@"audioBufferList2: %d", [pcmInt16Buffer audioBufferList]->mBuffers->mDataByteSize); - // _audioDataReceived(pcmInt16Buffer); + _audioDataReceived(pcmInt16Buffer); + NSError *wrtieFromBufferError = nil; [file writeFromBuffer:pcmInt16Buffer error:&wrtieFromBufferError]; if (wrtieFromBufferError != nil) { From dc992b76d420e1757ba36175fe00909ddc48899a Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 18 Jul 2017 10:39:29 +0900 Subject: [PATCH 16/26] Fix to support channels in iOS --- ios/AudioRecorderManager.m | 139 ++----------------------------------- ios/StreamingModule.m | 43 ++++-------- 2 files changed, 18 insertions(+), 164 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index c31bd2e5..5d6b1e05 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -87,15 +87,6 @@ - (void)finishRecording:(BOOL)flag { }]; } -/* -- (void)audioRecorderDidFinishRecording:(AVAudioRecorder *)recorder successfully:(BOOL)flag { - [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventFinished body:@{ - @"status": flag ? @"OK" : @"ERROR", - @"audioFileURL": [_audioFileURL absoluteString] - }]; -} - */ - - (NSString *) applicationDocumentsDirectory { NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES); @@ -103,126 +94,6 @@ - (NSString *) applicationDocumentsDirectory return basePath; } -/* -RCT_EXPORT_METHOD(prepareRecordingAtPath:(NSString *)path sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) -{ - _prevProgressUpdateTime = nil; - [self stopProgressTimer]; - - _audioFileURL = [NSURL fileURLWithPath:path]; - - // Default options - _audioQuality = [NSNumber numberWithInt:AVAudioQualityHigh]; - _audioEncoding = [NSNumber numberWithInt:kAudioFormatAppleIMA4]; - _audioChannels = [NSNumber numberWithInt:2]; - _audioSampleRate = [NSNumber numberWithFloat:44100.0]; - _meteringEnabled = NO; - - // Set audio quality from options - if (quality != nil) { - if ([quality isEqual: @"Low"]) { - _audioQuality =[NSNumber numberWithInt:AVAudioQualityLow]; - } else if ([quality isEqual: @"Medium"]) { - _audioQuality =[NSNumber numberWithInt:AVAudioQualityMedium]; - } else if ([quality isEqual: @"High"]) { - _audioQuality =[NSNumber numberWithInt:AVAudioQualityHigh]; - } - } - - // Set channels from options - if (channels != nil) { - _audioChannels = channels; - } - - // Set audio encoding from options - if (encoding != nil) { - if ([encoding isEqual: @"lpcm"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatLinearPCM]; - } else if ([encoding isEqual: @"ima4"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatAppleIMA4]; - } else if ([encoding isEqual: @"aac"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatMPEG4AAC]; - } else if ([encoding isEqual: @"MAC3"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatMACE3]; - } else if ([encoding isEqual: @"MAC6"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatMACE6]; - } else if ([encoding isEqual: @"ulaw"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatULaw]; - } else if ([encoding isEqual: @"alaw"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatALaw]; - } else if ([encoding isEqual: @"mp1"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatMPEGLayer1]; - } else if ([encoding isEqual: @"mp2"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatMPEGLayer2]; - } else if ([encoding isEqual: @"alac"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatAppleLossless]; - } else if ([encoding isEqual: @"amr"]) { - _audioEncoding =[NSNumber numberWithInt:kAudioFormatAMR]; - } - } - - // Set sample rate from options - _audioSampleRate = [NSNumber numberWithFloat:sampleRate]; - - NSDictionary *recordSettings = [NSDictionary dictionaryWithObjectsAndKeys: - _audioQuality, AVEncoderAudioQualityKey, - _audioEncoding, AVFormatIDKey, - _audioChannels, AVNumberOfChannelsKey, - _audioSampleRate, AVSampleRateKey, - nil]; - - // Enable metering from options - if (meteringEnabled != NO) { - _meteringEnabled = meteringEnabled; - } - - NSError *error = nil; - - _recordSession = [AVAudioSession sharedInstance]; - [_recordSession setCategory:AVAudioSessionCategoryMultiRoute error:nil]; - - _audioRecorder = [[AVAudioRecorder alloc] - initWithURL:_audioFileURL - settings:recordSettings - error:&error]; - - _audioRecorder.meteringEnabled = _meteringEnabled; - _audioRecorder.delegate = self; - - if (error) { - NSLog(@"error: %@", [error localizedDescription]); - // TODO: dispatch error over the bridge - } else { - [_audioRecorder prepareToRecord]; - } -} - -RCT_EXPORT_METHOD(startRecording) -{ - if (!_audioRecorder.recording) { - [self startProgressTimer]; - [_recordSession setActive:YES error:nil]; - [_audioRecorder record]; - - } -} - -RCT_EXPORT_METHOD(stopRecording) -{ - [_audioRecorder stop]; - [_recordSession setActive:NO error:nil]; - _prevProgressUpdateTime = nil; -} - -RCT_EXPORT_METHOD(pauseRecording) -{ - if (_audioRecorder.recording) { - [self stopProgressTimer]; - [_audioRecorder pause]; - } -} -*/ - RCT_EXPORT_METHOD(checkAuthorizationStatus:(RCTPromiseResolveBlock)resolve reject:(__unused RCTPromiseRejectBlock)reject) { AVAudioSessionRecordPermission permissionStatus = [[AVAudioSession sharedInstance] recordPermission]; @@ -328,14 +199,14 @@ - (NSString *) applicationDocumentsDirectory [streamingModule prepare:_audioFileURL settings:recordSettings handler:^(AVAudioPCMBuffer *buf){ - NSLog(@"%@", buf); NSMutableArray *body = [[NSMutableArray alloc] init]; - int16_t * const left = [buf int16ChannelData][0]; for(int i=0; imNumberBuffers); - NSLog(@"audioBufferList2: %d", [pcmInt16Buffer audioBufferList]->mNumberBuffers); - - NSLog(@"audioBufferList1: %d", [buf audioBufferList]->mBuffers->mDataByteSize); - NSLog(@"audioBufferList2: %d", [pcmInt16Buffer audioBufferList]->mBuffers->mDataByteSize); - _audioDataReceived(pcmInt16Buffer); NSError *wrtieFromBufferError = nil; From f7e873f98323ea4a369bd1d83b93ec2093c5c999 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 18 Jul 2017 11:54:37 +0900 Subject: [PATCH 17/26] Refactor android code --- .../rnim/rn/audio/AudioRecorderManager.java | 141 +++--------------- .../com/rnim/rn/audio/RecordWaveTask.java | 44 +----- 2 files changed, 27 insertions(+), 158 deletions(-) diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 96d2a260..621ab92d 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -1,7 +1,6 @@ package com.rnim.rn.audio; import android.Manifest; -import android.content.Context; import com.facebook.react.bridge.ReactApplicationContext; import com.facebook.react.bridge.ReactContextBaseJavaModule; @@ -14,7 +13,6 @@ import com.facebook.react.bridge.WritableMap; import java.io.File; -import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Timer; @@ -22,20 +20,14 @@ import android.content.pm.PackageManager; import android.media.AudioFormat; -import android.media.AudioRecord; import android.os.AsyncTask; import android.os.Environment; import android.media.MediaRecorder; -import android.media.AudioManager; -import android.support.v4.app.ActivityCompat; import android.support.v4.content.ContextCompat; import android.util.Log; -import android.widget.Toast; import com.facebook.react.modules.core.DeviceEventManagerModule; -import java.io.FileInputStream; - class AudioRecorderManager extends ReactContextBaseJavaModule { private static final String TAG = "ReactNativeAudio"; @@ -48,8 +40,6 @@ class AudioRecorderManager extends ReactContextBaseJavaModule { private static final String MusicDirectoryPath = "MusicDirectoryPath"; private static final String DownloadsDirectoryPath = "DownloadsDirectoryPath"; - private Context context; - private MediaRecorder recorder; private String currentOutputFile; private boolean isRecording = false; private Timer timer; @@ -58,14 +48,10 @@ class AudioRecorderManager extends ReactContextBaseJavaModule { // For AudioRecord Class private RecordWaveTask recordTask = null; - public AudioRecorderManager(ReactApplicationContext reactContext) { super(reactContext); - this.context = reactContext; if (recordTask == null) { - recordTask = new RecordWaveTask(context); - } else { - recordTask.setContext(context); + recordTask = new RecordWaveTask(); } } @@ -95,57 +81,24 @@ public void checkAuthorizationStatus(Promise promise) { promise.resolve(permissionGranted); } - @ReactMethod - public void prepareRecordingAtPath(String recordingPath, ReadableMap recordingSettings, Promise promise) { - if (isRecording){ - logAndRejectPromise(promise, "INVALID_STATE", "Please call stopRecording before starting recording"); - } - - recorder = new MediaRecorder(); - try { - recorder.setAudioSource(MediaRecorder.AudioSource.MIC); - int outputFormat = getOutputFormatFromString(recordingSettings.getString("OutputFormat")); - recorder.setOutputFormat(outputFormat); - int audioEncoder = getAudioEncoderFromString(recordingSettings.getString("AudioEncoding")); - recorder.setAudioEncoder(audioEncoder); - recorder.setAudioSamplingRate(recordingSettings.getInt("SampleRate")); - recorder.setAudioChannels(recordingSettings.getInt("Channels")); - recorder.setAudioEncodingBitRate(recordingSettings.getInt("AudioEncodingBitRate")); - recorder.setOutputFile(recordingPath); - } - catch(final Exception e) { - logAndRejectPromise(promise, "COULDNT_CONFIGURE_MEDIA_RECORDER" , "Make sure you've added RECORD_AUDIO permission to your AndroidManifest.xml file "+e.getMessage()); - return; - } - - currentOutputFile = recordingPath; - try { - recorder.prepare(); - promise.resolve(currentOutputFile); - } catch (final Exception e) { - logAndRejectPromise(promise, "COULDNT_PREPARE_RECORDING_AT_PATH "+recordingPath, e.getMessage()); - } - - } - private int getAudioEncoderFromString(String audioEncoder) { - switch (audioEncoder) { - case "aac": - return MediaRecorder.AudioEncoder.AAC; - case "aac_eld": - return MediaRecorder.AudioEncoder.AAC_ELD; - case "amr_nb": - return MediaRecorder.AudioEncoder.AMR_NB; - case "amr_wb": - return MediaRecorder.AudioEncoder.AMR_WB; - case "he_aac": - return MediaRecorder.AudioEncoder.HE_AAC; - case "vorbis": - return MediaRecorder.AudioEncoder.VORBIS; - default: - Log.d("INVALID_AUDIO_ENCODER", "USING MediaRecorder.AudioEncoder.DEFAULT instead of "+audioEncoder+": "+MediaRecorder.AudioEncoder.DEFAULT); - return MediaRecorder.AudioEncoder.DEFAULT; - } + switch (audioEncoder) { + case "aac": + return MediaRecorder.AudioEncoder.AAC; + case "aac_eld": + return MediaRecorder.AudioEncoder.AAC_ELD; + case "amr_nb": + return MediaRecorder.AudioEncoder.AMR_NB; + case "amr_wb": + return MediaRecorder.AudioEncoder.AMR_WB; + case "he_aac": + return MediaRecorder.AudioEncoder.HE_AAC; + case "vorbis": + return MediaRecorder.AudioEncoder.VORBIS; + default: + Log.d("INVALID_AUDIO_ENCODER", "USING MediaRecorder.AudioEncoder.DEFAULT instead of "+audioEncoder+": "+MediaRecorder.AudioEncoder.DEFAULT); + return MediaRecorder.AudioEncoder.DEFAULT; + } } private int getOutputFormatFromString(String outputFormat) { @@ -174,7 +127,7 @@ public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSe try { File wavFile = new File(recordingPath); - recordTask = new RecordWaveTask(context); + recordTask = new RecordWaveTask(); recordTask.setAudioSource(MediaRecorder.AudioSource.MIC); @@ -210,7 +163,6 @@ public void onDataReceived(byte[] buffer) { // int audioEncoder = getAudioEncoderFromString(recordingSettings.getString("AudioEncoding")); // recorder.setAudioEncoder(audioEncoder); // recorder.setAudioEncodingBitRate(recordingSettings.getInt("AudioEncodingBitRate")); - // recorder.setOutputFile(recordingPath); } catch(final Exception e) { logAndRejectPromise(promise, "COULDNT_CONFIGURE_MEDIA_RECORDER" , "Make sure you've added RECORD_AUDIO permission to your AndroidManifest.xml file "+e.getMessage()); @@ -228,16 +180,13 @@ public void startStreaming(Promise promise){ } switch (recordTask.getStatus()) { case RUNNING: - // Toast.makeText(context, "Task already running...", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call stopStreaming before starting streaming"); return; case FINISHED: logAndRejectPromise(promise, "STREAMING_NOT_PREPARED", "Please call prepareStreamingAtPath before starting streaming"); break; case PENDING: - if (recordTask.isCancelled()) { - // recordTask = new RecordWaveTask(context); - } + // No Action } startTimer(); @@ -266,58 +215,14 @@ public void onCancelCompleted() { stopTimer(); } else { Log.d("RecordWaveTask", "Task not running."); - // Toast.makeText(context, "Task not running.", Toast.LENGTH_SHORT).show(); logAndRejectPromise(promise, "INVALID_STATE", "Please call startStreaming before stopping streaming"); } } @ReactMethod - public void startRecording(Promise promise){ - if (recorder == null){ - logAndRejectPromise(promise, "RECORDING_NOT_PREPARED", "Please call prepareRecordingAtPath before starting recording"); - return; - } - if (isRecording){ - logAndRejectPromise(promise, "INVALID_STATE", "Please call stopRecording before starting recording"); - return; - } - recorder.start(); - isRecording = true; - startTimer(); - promise.resolve(currentOutputFile); - } - - @ReactMethod - public void stopRecording(Promise promise){ - if (!isRecording){ - logAndRejectPromise(promise, "INVALID_STATE", "Please call startRecording before stopping recording"); - return; - } - - stopTimer(); - isRecording = false; - - try { - recorder.stop(); - recorder.release(); - } - catch (final RuntimeException e) { - // https://developer.android.com/reference/android/media/MediaRecorder.html#stop() - logAndRejectPromise(promise, "RUNTIME_EXCEPTION", "No valid audio data received. You may be using a device that can't record audio."); - return; - } - finally { - recorder = null; - } - - promise.resolve(currentOutputFile); - sendEvent("recordingFinished", null); - } - - @ReactMethod - public void pauseRecording(Promise promise){ + public void pauseStreaming(Promise promise){ // Added this function to have the same api for android and iOS, stops recording now - stopRecording(promise); + stopStreaming(promise); } private void startTimer(){ @@ -342,7 +247,7 @@ private void stopTimer(){ timer = null; } } - + private void sendEvent(String eventName, Object params) { getReactApplicationContext() .getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class) diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index b86963a4..1bfe699e 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -1,13 +1,11 @@ package com.rnim.rn.audio; -import android.content.Context; import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; import android.os.AsyncTask; import android.os.SystemClock; import android.util.Log; -import android.widget.Toast; import java.io.File; import java.io.FileOutputStream; @@ -16,7 +14,6 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Locale; /** * Created by KDH on 2017. 5. 15.. @@ -30,17 +27,9 @@ public class RecordWaveTask extends AsyncTask { private int ENCODING = AudioFormat.ENCODING_PCM_16BIT; private int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; - private Context ctx; private File outputFile; - public RecordWaveTask(Context ctx) { - setContext(ctx); - } - - public void setContext(Context ctx) { - this.ctx = ctx; - } - + public RecordWaveTask() {} public void setAudioSource(int audioSource) { this.AUDIO_SOURCE = audioSource; } public void setSampleRate(int sampleRate) { this.SAMPLE_RATE = sampleRate; } @@ -119,8 +108,6 @@ protected Object[] doInBackground(File... unused) { run = false; } else if (read >= 0) { // Write out the entire read buffer - Log.d("RecordWaveTask", read + ""); - Log.d("RecordWaveTask", buffer.length + ""); wavOut.write(buffer, 0, read); total += read; if (this.streamListener != null) { @@ -160,18 +147,14 @@ protected Object[] doInBackground(File... unused) { try { // This is not put in the try/catch/finally above since it needs to run // after we close the FileOutputStream - Log.d("RecordWaveTask", "updateWavHeaderPrev"); this.updateWavHeader(this.outputFile); - Log.d("RecordWaveTask", "updateWavHeaderAfter"); } catch (IOException ex) { - Log.d("RecordWaveTask", "???"); Log.d("RecordWaveTask", ex.getMessage()); return new Object[] { ex }; } - Log.d("RecordWaveTask", "Bye"); - Log.d("RecordWaveTask", (endTime - startTime) + "" ); - Log.d("RecordWaveTask", this.outputFile.length() + "" ); + Log.d("RecordWaveTask", (endTime - startTime) + " sec" ); + Log.d("RecordWaveTask", this.outputFile.length() + " byte" ); return new Object[] { this.outputFile.length(), endTime - startTime }; } @@ -267,7 +250,6 @@ private static void writeWavHeader(OutputStream out, short channels, int sampleR * @throws IOException */ private static void updateWavHeader(File wav) throws IOException { - Log.d("RecordWaveTask", "updateWavHeader0"); byte[] sizes = ByteBuffer .allocate(8) .order(ByteOrder.LITTLE_ENDIAN) @@ -277,7 +259,7 @@ private static void updateWavHeader(File wav) throws IOException { .putInt((int) (wav.length() - 8)) // ChunkSize .putInt((int) (wav.length() - 44)) // Subchunk2Size .array(); - Log.d("RecordWaveTask", "updateWavHeader1"); + RandomAccessFile accessWave = null; //noinspection CaughtExceptionImmediatelyRethrown try { @@ -286,13 +268,9 @@ private static void updateWavHeader(File wav) throws IOException { accessWave.seek(4); accessWave.write(sizes, 0, 4); - Log.d("RecordWaveTask", "updateWavHeader2"); - // Subchunk2Size accessWave.seek(40); accessWave.write(sizes, 4, 4); - - Log.d("RecordWaveTask", "updateWavHeader3"); } catch (IOException ex) { // Rethrow but we still close accessWave in our finally throw ex; @@ -324,20 +302,6 @@ protected void onPostExecute(Object[] results) { Log.e(RecordWaveTask.class.getSimpleName(), throwable.getMessage(), throwable); } - // If we're attached to an activity - if (ctx != null) { - if (throwable == null) { - // Display final recording stats - double size = (long) results[0] / 1000000.00; - long time = (long) results[1] / 1000; - Toast.makeText(ctx, String.format(Locale.getDefault(), "%.2f MB / %d seconds", - size, time), Toast.LENGTH_LONG).show(); - } else { - // Error - Toast.makeText(ctx, throwable.getLocalizedMessage(), Toast.LENGTH_LONG).show(); - } - } - if (cancelCompleteListener != null) { cancelCompleteListener.onCancelCompleted(); } From a33a9d0873713160b923ed15402748d3257a8257 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 18 Jul 2017 13:39:01 +0900 Subject: [PATCH 18/26] Support buffer size --- AudioExample/AudioExample.js | 2 +- .../com/rnim/rn/audio/AudioRecorderManager.java | 4 +++- .../java/com/rnim/rn/audio/RecordWaveTask.java | 5 ++++- index.js | 16 +++++++--------- ios/AudioRecorderManager.m | 8 +++++++- ios/StreamingModule.h | 3 ++- ios/StreamingModule.m | 7 ++++--- 7 files changed, 28 insertions(+), 17 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 08be7107..e4805d45 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -25,7 +25,7 @@ class AudioExample extends Component { }; prepareRecordingPath(audioPath){ - AudioRecorder.prepareStreamingAtPath(this.state.audioPath, { + AudioRecorder.prepareStreamingAtPath(this.state.audioPath, 8192, { SampleRate: 22050, Channels: 1, // Following is not supported diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 621ab92d..2d8ecea6 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -123,7 +123,7 @@ private int getOutputFormatFromString(String outputFormat) { } @ReactMethod - public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSettings, Promise promise) { + public void prepareStreamingAtPath(String recordingPath, int bufferSize, ReadableMap recordingSettings, Promise promise) { try { File wavFile = new File(recordingPath); @@ -144,6 +144,8 @@ public void prepareStreamingAtPath(String recordingPath, ReadableMap recordingSe recordTask.setChannelMask(channelMask); } + recordTask.setBufferSize(bufferSize); + recordTask.setOutputFile(wavFile); recordTask.setStreamListener(new RecordWaveTask.OnStreamListener() { diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index 1bfe699e..36d966c8 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -26,6 +26,8 @@ public class RecordWaveTask extends AsyncTask { private int SAMPLE_RATE = 44100; // Hz private int ENCODING = AudioFormat.ENCODING_PCM_16BIT; private int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; + private int BUFFER_SIZE = 8192; + // int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); private File outputFile; @@ -40,6 +42,8 @@ public RecordWaveTask() {} public void setOutputFile(File file) { this.outputFile = file; } + public void setBufferSize(int bufferSize) { this.BUFFER_SIZE = bufferSize; } + // Step 1 - This interface defines the type of messages I want to communicate to my owner public interface OnCancelCompleteListener { public void onCancelCompleted(); @@ -68,7 +72,6 @@ public void setStreamListener(OnStreamListener listener) { */ @Override protected Object[] doInBackground(File... unused) { - int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); AudioRecord audioRecord = null; FileOutputStream wavOut = null; long startTime = 0; diff --git a/index.js b/index.js index 51c0188c..93eaefc1 100644 --- a/index.js +++ b/index.js @@ -12,7 +12,7 @@ import ReactNative, { var AudioRecorderManager = NativeModules.AudioRecorderManager; var AudioRecorder = { - prepareStreamingAtPath: function(path, options) { + prepareStreamingAtPath: function(path, bufferSize=8192, options) { if (this.progressSubscription) this.progressSubscription.remove(); this.progressSubscription = NativeAppEventEmitter.addListener('recordingProgress', (data) => { @@ -25,7 +25,6 @@ var AudioRecorder = { if (this.finishedSubscription) this.finishedSubscription.remove(); this.finishedSubscription = NativeAppEventEmitter.addListener('recordingFinished', (data) => { - console.log('recordingFinished()'); if (this.onFinished) { this.onFinished(data); } @@ -35,7 +34,6 @@ var AudioRecorder = { if (this.dataReceivedSubscription) this.dataReceivedSubscription.remove(); this.dataReceivedSubscription = NativeAppEventEmitter.addListener('dataReceived', (data) => { - console.log(data); if (this.onDataReceived) { this.onDataReceived(data); } @@ -44,28 +42,28 @@ var AudioRecorder = { var defaultOptions = { SampleRate: 44100.0, - Channels: 2, + Channels: 1, AudioQuality: 'High', AudioEncoding: 'ima4', - OutputFormat: 'mpeg_4', MeteringEnabled: false, - AudioEncodingBitRate: 32000 + // OutputFormat: 'mpeg_4', + // AudioEncodingBitRate: 32000 }; var recordingOptions = {...defaultOptions, ...options}; if (Platform.OS === 'ios') { - console.log('prepareStreamingAtPath()'); AudioRecorderManager.prepareStreamingAtPath( path, + bufferSize, recordingOptions.SampleRate, recordingOptions.Channels, recordingOptions.AudioQuality, recordingOptions.AudioEncoding, - recordingOptions.MeteringEnabled + recordingOptions.MeteringEnabled, ); } else { - return AudioRecorderManager.prepareStreamingAtPath(path, recordingOptions); + return AudioRecorderManager.prepareStreamingAtPath(path, bufferSize, recordingOptions); } }, startStreaming: function() { diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 5d6b1e05..6f83244b 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -33,6 +33,7 @@ @implementation AudioRecorderManager { NSNumber *_audioSampleRate; AVAudioSession *_recordSession; BOOL _meteringEnabled; + int _bufferSize; } StreamingModule* streamingModule; @@ -125,7 +126,7 @@ - (NSString *) applicationDocumentsDirectory }]; } -RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) +RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path bufferSize:(int)bufferSize sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) { NSLog(@"prepareStreaming"); _audioFileURL = [NSURL fileURLWithPath:path]; @@ -136,6 +137,7 @@ - (NSString *) applicationDocumentsDirectory _audioChannels = [NSNumber numberWithInt:1]; _audioSampleRate = [NSNumber numberWithFloat:44100.0]; _meteringEnabled = NO; + _bufferSize = 8192; // Set audio quality from options if (quality != nil) { @@ -183,6 +185,9 @@ - (NSString *) applicationDocumentsDirectory // Set sample rate from options _audioSampleRate = [NSNumber numberWithFloat:sampleRate]; + // Set buffer size from options + _bufferSize = bufferSize; + NSDictionary *recordSettings = [NSDictionary dictionaryWithObjectsAndKeys: //_audioQuality, AVEncoderAudioQualityKey, //_audioEncoding, AVFormatIDKey, @@ -197,6 +202,7 @@ - (NSString *) applicationDocumentsDirectory streamingModule = [[StreamingModule alloc] init]; [streamingModule prepare:_audioFileURL + bufferSize:_bufferSize settings:recordSettings handler:^(AVAudioPCMBuffer *buf){ NSMutableArray *body = [[NSMutableArray alloc] init]; diff --git a/ios/StreamingModule.h b/ios/StreamingModule.h index 00e40b0f..c3e38948 100644 --- a/ios/StreamingModule.h +++ b/ios/StreamingModule.h @@ -16,13 +16,14 @@ NSDictionary *_settings; AVAudioMixerNode *_downMixer; NSTimeInterval _startTime; + int _bufferSize; @public bool recording; NSTimeInterval currentTime; } -- (void)prepare:(NSURL*)recordingFileUrl settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler; +- (void)prepare:(NSURL*)recordingFileUrl bufferSize:(int)bufferSize settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler; - (void)start; - (void)pause; - (void)stop; diff --git a/ios/StreamingModule.m b/ios/StreamingModule.m index 8ebdb170..9a524175 100644 --- a/ios/StreamingModule.m +++ b/ios/StreamingModule.m @@ -10,10 +10,11 @@ @implementation StreamingModule -- (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler { +- (void)prepare:(NSURL *)recordingFileUrl bufferSize:(int)bufferSize settings:(NSDictionary*)settings handler:(void(^)(AVAudioPCMBuffer *))handler { _audioDataReceived = [handler copy]; _fileUrl = recordingFileUrl; _settings = settings; + _bufferSize = bufferSize; _engine = [[AVAudioEngine alloc] init]; @@ -55,7 +56,7 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl NSLog(@"InstallTapOnBus"); - [_downMixer installTapOnBus: 0 bufferSize: 8192 format: pcmFloat32Format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { + [_downMixer installTapOnBus: 0 bufferSize: _bufferSize format: pcmFloat32Format block: ^(AVAudioPCMBuffer *buf, AVAudioTime *when) { // ‘buf' contains audio captured from input node at time 'when' currentTime = when.sampleTime / when.sampleRate - _startTime; @@ -91,7 +92,7 @@ - (void)prepare:(NSURL *)recordingFileUrl settings:(NSDictionary*)settings handl - (void)start { if (_engine == nil) { if (_audioDataReceived != nil && _fileUrl != nil && _settings != nil) { - [self prepare:_fileUrl settings:_settings handler:_audioDataReceived]; + [self prepare:_fileUrl bufferSize:_bufferSize settings:_settings handler:_audioDataReceived]; } else { NSLog(@"Have to prepare before start"); return; From 8d5d60446903f81e0b5d613b79587aeea91884bf Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Wed, 2 Aug 2017 19:08:11 +0900 Subject: [PATCH 19/26] Fix audio buffer in Android --- .../rnim/rn/audio/AudioRecorderManager.java | 4 +-- .../com/rnim/rn/audio/RecordWaveTask.java | 36 ++++++++++++------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 2d8ecea6..a2c77588 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -150,10 +150,10 @@ public void prepareStreamingAtPath(String recordingPath, int bufferSize, Readabl recordTask.setStreamListener(new RecordWaveTask.OnStreamListener() { @Override - public void onDataReceived(byte[] buffer) { + public void onDataReceived(short[] buffer) { Log.d("onDataReceived", buffer.length + ""); WritableArray body = Arguments.createArray(); - for (byte value: buffer) { + for (short value: buffer) { body.pushInt((int) value); } sendEvent("dataReceived", body); diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index 36d966c8..a1aace39 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -26,7 +26,7 @@ public class RecordWaveTask extends AsyncTask { private int SAMPLE_RATE = 44100; // Hz private int ENCODING = AudioFormat.ENCODING_PCM_16BIT; private int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; - private int BUFFER_SIZE = 8192; + private int BUFFER_SIZE_IN_FRAME = 8192; // int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); private File outputFile; @@ -42,7 +42,7 @@ public RecordWaveTask() {} public void setOutputFile(File file) { this.outputFile = file; } - public void setBufferSize(int bufferSize) { this.BUFFER_SIZE = bufferSize; } + public void setBufferSize(int bufferSizeInFrame) { this.BUFFER_SIZE_IN_FRAME = bufferSizeInFrame; } // Step 1 - This interface defines the type of messages I want to communicate to my owner public interface OnCancelCompleteListener { @@ -55,7 +55,7 @@ public void setCancelCompleteListener(OnCancelCompleteListener listener) { } public interface OnStreamListener { - public void onDataReceived(byte[] buffer); + public void onDataReceived(short[] buffer); } private OnStreamListener streamListener = null; @@ -74,19 +74,21 @@ public void setStreamListener(OnStreamListener listener) { protected Object[] doInBackground(File... unused) { AudioRecord audioRecord = null; FileOutputStream wavOut = null; + long startTime = 0; long endTime = 0; try { // Open our two resources - audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, BUFFER_SIZE); + int bufferSizeInBytes = BUFFER_SIZE_IN_FRAME * 2; + audioRecord = new AudioRecord(AUDIO_SOURCE, SAMPLE_RATE, CHANNEL_MASK, ENCODING, bufferSizeInBytes); wavOut = new FileOutputStream(this.outputFile); // Write out the wav file header writeWavHeader(wavOut, CHANNEL_MASK, SAMPLE_RATE, ENCODING); // Avoiding loop allocations - byte[] buffer = new byte[BUFFER_SIZE]; + short[] buffer = new short[BUFFER_SIZE_IN_FRAME]; boolean run = true; int read; long total = 0; @@ -95,14 +97,16 @@ protected Object[] doInBackground(File... unused) { startTime = SystemClock.elapsedRealtime(); audioRecord.startRecording(); while (run && !isCancelled()) { - read = audioRecord.read(buffer, 0, buffer.length); + read = audioRecord.read(buffer, 0, buffer.length); // Count for 16 bit PCM // WAVs cannot be > 4 GB due to the use of 32 bit unsigned integers. if (total + read > 4294967295L) { // Write as many bytes as we can before hitting the max size - byte[] tmpBuffer = new byte[BUFFER_SIZE]; - for (int i = 0; i < read && total <= 4294967295L; i++, total++) { - wavOut.write(buffer[i]); + short[] tmpBuffer = new short[BUFFER_SIZE_IN_FRAME]; + for (int i = 0; i < read && total <= 4294967295L; i++, total+=2) { + ByteBuffer byteBuffer = ByteBuffer.allocate(2); + byteBuffer.putShort(buffer[i]); + wavOut.write(byteBuffer.array()); tmpBuffer[i] = buffer[i]; } if (this.streamListener != null) { @@ -110,11 +114,17 @@ protected Object[] doInBackground(File... unused) { } run = false; } else if (read >= 0) { - // Write out the entire read buffer - wavOut.write(buffer, 0, read); - total += read; + // Short array to byte array + ByteBuffer byteBuffer = ByteBuffer.allocate(buffer.length * 2); + byteBuffer.order(ByteOrder.LITTLE_ENDIAN); + byteBuffer.asShortBuffer().put(buffer); + byte[] bytes = byteBuffer.array(); + + wavOut.write(bytes, 0, read * 2); + + total += (read * 2); // 2 Byte = Short if (this.streamListener != null) { - Log.d("onDataReceived", "RecordWaveTask - " + buffer.length + ""); + Log.d("onDataReceived", "RecordWaveTask - " + read + ""); this.streamListener.onDataReceived(buffer.clone()); } } From 65585eb21117154b5712674a7477a601c72c604f Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Mon, 7 Aug 2017 21:32:04 +0900 Subject: [PATCH 20/26] Apply WIT VAD in Android --- AudioExample/AudioExample.js | 10 +- android/build.gradle | 5 + .../rnim/rn/audio/AudioRecorderManager.java | 11 + .../com/rnim/rn/audio/RecordWaveTask.java | 62 +++ android/src/main/jni/WITCvad.c | 357 ++++++++++++++++++ android/src/main/jni/WITCvad.h | 169 +++++++++ android/src/main/jni/WITVadSimple.c | 206 ++++++++++ android/src/main/jni/WITVadSimple.h | 73 ++++ android/src/main/jni/WITVadWrapper.c | 56 +++ index.js | 10 + 10 files changed, 956 insertions(+), 3 deletions(-) create mode 100644 android/src/main/jni/WITCvad.c create mode 100644 android/src/main/jni/WITCvad.h create mode 100644 android/src/main/jni/WITVadSimple.c create mode 100644 android/src/main/jni/WITVadSimple.h create mode 100644 android/src/main/jni/WITVadWrapper.c diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index e4805d45..6b9b2b84 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -25,8 +25,8 @@ class AudioExample extends Component { }; prepareRecordingPath(audioPath){ - AudioRecorder.prepareStreamingAtPath(this.state.audioPath, 8192, { - SampleRate: 22050, + AudioRecorder.prepareStreamingAtPath(this.state.audioPath, 1600, { + SampleRate: 16000, Channels: 1, // Following is not supported // AudioQuality: "Low", @@ -56,7 +56,11 @@ class AudioExample extends Component { }; AudioRecorder.onDataReceived = (data) => { - console.log(data); + // console.log(data); + } + + AudioRecorder.onVadReceived = (vadResult) => { + console.log(vadResult); } }); } diff --git a/android/build.gradle b/android/build.gradle index 22023145..56d21662 100644 --- a/android/build.gradle +++ b/android/build.gradle @@ -9,6 +9,10 @@ android { targetSdkVersion 23 versionCode 1 versionName "1.0" + ndk { + moduleName "witvad" + ldLibs "log" + } } buildTypes { release { @@ -21,4 +25,5 @@ dependencies { compile fileTree(include: ['*.jar'], dir: 'libs') compile 'com.android.support:appcompat-v7:23.1.0' compile 'com.facebook.react:react-native:+' + compile 'com.github.wendykierp:JTransforms:3.0' } diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index a2c77588..1d4285c0 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -160,6 +160,17 @@ public void onDataReceived(short[] buffer) { } }); + recordTask.setVadListener(new RecordWaveTask.OnVadListener() { + + @Override + public void onVadReceived(int vadResult) { + Log.d("onVadReceived", vadResult + ""); + // WritableMap body = Arguments.createMap(); + // body.putInt("vadResult", vadResult); + sendEvent("vadReceived", vadResult); + } + }); + // int outputFormat = getOutputFormatFromString(recordingSettings.getString("OutputFormat")); // recorder.setOutputFormat(outputFormat); // int audioEncoder = getAudioEncoderFromString(recordingSettings.getString("AudioEncoding")); diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index a1aace39..3f5bca81 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -15,6 +15,9 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Arrays; +import org.jtransforms.fft.FloatFFT_1D; + /** * Created by KDH on 2017. 5. 15.. */ @@ -27,10 +30,21 @@ public class RecordWaveTask extends AsyncTask { private int ENCODING = AudioFormat.ENCODING_PCM_16BIT; private int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; private int BUFFER_SIZE_IN_FRAME = 8192; + private int vadSensitivity = 0; + private int vadTimeout = 7000; // int BUFFER_SIZE = 2 * AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_MASK, ENCODING); private File outputFile; + static { + System.loadLibrary("witvad"); + } + + public native int VadInit(int vadSensitivity, int vadTimeout); + public native int VadStillTalking(short[] samples, float[] fft_mags); + public native int GetVadSamplesPerFrame(); + public native void VadClean(); + public RecordWaveTask() {} public void setAudioSource(int audioSource) { this.AUDIO_SOURCE = audioSource; } @@ -44,6 +58,10 @@ public RecordWaveTask() {} public void setBufferSize(int bufferSizeInFrame) { this.BUFFER_SIZE_IN_FRAME = bufferSizeInFrame; } + public void setVadSensitivity(int vadSensitivity) { this.vadSensitivity = vadSensitivity; } + + public void setVadTimeout(int vadTimeout) { this.vadTimeout = vadTimeout; } + // Step 1 - This interface defines the type of messages I want to communicate to my owner public interface OnCancelCompleteListener { public void onCancelCompleted(); @@ -63,6 +81,15 @@ public void setStreamListener(OnStreamListener listener) { this.streamListener = listener; } + public interface OnVadListener { + public void onVadReceived(int vadResult); + } + private OnVadListener vadListener = null; + + public void setVadListener(OnVadListener listener) { + this.vadListener = listener; + } + /** * Opens up the given file, writes the header, and keeps filling it with raw PCM bytes from * AudioRecord until it reaches 4GB or is stopped by the user. It then goes back and updates @@ -92,6 +119,14 @@ protected Object[] doInBackground(File... unused) { boolean run = true; int read; long total = 0; + int vadResult; + + VadInit(vadSensitivity, vadTimeout); + + FloatFFT_1D fft = new FloatFFT_1D(GetVadSamplesPerFrame()); + float[] fft_mags = new float[GetVadSamplesPerFrame()/2]; + float[] fft_modules = new float[GetVadSamplesPerFrame()]; + short[] samples; // Let's go startTime = SystemClock.elapsedRealtime(); @@ -99,6 +134,32 @@ protected Object[] doInBackground(File... unused) { while (run && !isCancelled()) { read = audioRecord.read(buffer, 0, buffer.length); // Count for 16 bit PCM + int samplesAnalyzed = 0; + while(samplesAnalyzed + GetVadSamplesPerFrame() < read){ + samples = Arrays.copyOfRange(buffer, samplesAnalyzed, samplesAnalyzed +GetVadSamplesPerFrame()); + for(int i=0; i 4 GB due to the use of 32 bit unsigned integers. if (total + read > 4294967295L) { // Write as many bytes as we can before hitting the max size @@ -136,6 +197,7 @@ protected Object[] doInBackground(File... unused) { if (audioRecord != null) { try { if (audioRecord.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) { + VadClean(); audioRecord.stop(); Log.d("RecordWaveTask", "audioRecord.stop()"); endTime = SystemClock.elapsedRealtime(); diff --git a/android/src/main/jni/WITCvad.c b/android/src/main/jni/WITCvad.c new file mode 100644 index 00000000..91eabf30 --- /dev/null +++ b/android/src/main/jni/WITCvad.c @@ -0,0 +1,357 @@ +// +// WITCvad.m +// Wit +// +// Created by Anthony Kesich on 11/12/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#include "WITCvad.h" + + +/* + Adds value to the head of memory + */ +static void frame_memory_push(s_wv_detector_cvad_state *cvad_state, short int value); + +/* + Sums up the last N values of memory + */ +static int frame_memory_sum_last_n(s_wv_detector_cvad_state *cvad_state, int nb); + + +int wvs_cvad_detect_talking(s_wv_detector_cvad_state *cvad_state, short int *samples, float *fft_mags) +{ + double dfc; + double band_energy[DETECTOR_CVAD_N_ENERGY_BANDS]; + double sfm; + int fft_size = pow(2,floor(log2(cvad_state->samples_per_frame))); + short int counter; + int action = -1; + int zero_crossings; + + //only process cvad_state->samples_per_frame samples at a time + //frames_detector_cvad_fft(samples, fft_modules, cvad_state->samples_per_frame); + dfc = frames_detector_cvad_most_dominant_freq(cvad_state, fft_mags, fft_size, cvad_state->samples_per_frame); + sfm = frames_detector_cvad_spectral_flatness(fft_mags, fft_size); + zero_crossings = frames_detector_cvad_zero_crossings(samples, cvad_state->samples_per_frame); + frames_detector_cvad_multiband_energy(cvad_state, fft_mags, fft_size, band_energy, cvad_state->samples_per_frame); + + vw_detector_cvad_set_threshold(cvad_state); + counter = vw_detector_cvad_check_frame(cvad_state, band_energy, dfc, sfm, zero_crossings); + frame_memory_push(cvad_state, counter); + + if ((counter < 3 && cvad_state->talking == 0) || !cvad_state->thresh_initialized) { + cvad_state->silence_count++; + //only update reference levels if we don't detect speech + wv_detector_cvad_update_ref_levels(cvad_state, band_energy, dfc, sfm); + } + if (cvad_state->thresh_initialized) { + int start_sum = frame_memory_sum_last_n(cvad_state, DETECTOR_CVAD_N_FRAMES_CHECK_START); + int stop_sum_long = frame_memory_sum_last_n(cvad_state, DETECTOR_CVAD_N_FRAMES_CHECK_END_LONG); + int stop_sum_short = frame_memory_sum_last_n(cvad_state, DETECTOR_CVAD_N_FRAMES_CHECK_END_SHORT); + int speech_time = (cvad_state->frame_number-cvad_state->speech_start_frame) * cvad_state->samples_per_frame * 1000 / cvad_state->sample_freq; + + if(start_sum > cvad_state->max_start_sum){ + cvad_state->max_start_sum = start_sum; + } + if (!cvad_state->talking && start_sum >= cvad_state->start_sum_threshold ) { + cvad_state->talking = 1; + cvad_state->speech_start_frame = cvad_state->frame_number; + action = 1; + } + else if (cvad_state->talking && speech_time > DETECTOR_CVAD_MINIMUM_LENGTH + && ((counter < 3 + && stop_sum_long <= cvad_state->max_start_sum*cvad_state->end_sum_long_coeff + && stop_sum_short <= cvad_state->max_start_sum*cvad_state->end_sum_short_coeff) + || (cvad_state->max_speech_time > 0 + && speech_time >= cvad_state->max_speech_time))) { + cvad_state->talking = 0; + action = 0; + cvad_state->max_start_sum = 0; + } + } + + cvad_state->frame_number++; + + return action; +} + +s_wv_detector_cvad_state* wv_detector_cvad_init(int sample_rate, int sensitivity, int speech_timeout) +{ + s_wv_detector_cvad_state *cvad_state = malloc(sizeof(s_wv_detector_cvad_state)); + cvad_state->energy_thresh_coeff_lower = DETECTOR_CVAD_E_TH_COEFF_LOW_BAND; + cvad_state->energy_thresh_coeff_upper = DETECTOR_CVAD_E_TH_COEFF_UPPER_BANDS; + cvad_state->sfm_thresh= DETECTOR_CVAD_SFM_TH; + cvad_state->dfc_thresh= DETECTOR_CVAD_DFC_TH; + cvad_state->min_zero_crossings= DETECTOR_CVAD_MIN_ZERO_CROSSINGS; + cvad_state->max_zero_crossings= DETECTOR_CVAD_MAX_ZERO_CROSSINGS; + memset(cvad_state->energy_update_coeff, 0.20, DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + memset(cvad_state->energy_prev_variance, -1, DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + memset(cvad_state->energy_history, 0, DETECTOR_CVAD_ENERGY_MEMORY * DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + cvad_state->energy_history_index = 0; + cvad_state->dfc_update_coeff = 0.10; + cvad_state->sfm_update_coeff = 0.10; + cvad_state->frame_number = 0; + cvad_state->speech_start_frame = -1; + cvad_state->max_speech_time = speech_timeout; + cvad_state->thresh_initialized = 0; + cvad_state->silence_count = 0; + cvad_state->talking = 0; + memset(cvad_state->ref_energy, 0, DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + cvad_state->ref_dfc = 0; + cvad_state->ref_sfm = 0; + memset(cvad_state->dfc_history, 0, DETECTOR_CVAD_FRAMES_INIT * sizeof(double)); + cvad_state->sample_freq = sample_rate; + cvad_state->max_start_sum = 0; + cvad_state->samples_per_frame = pow(2,ceil(log2(cvad_state->sample_freq/150))); //around 100 frames per second, but must be a power of two + cvad_state->previous_state_index = 0; + memset(cvad_state->previous_state, 0, DETECTOR_CVAD_RESULT_MEMORY * sizeof(short int)); + + wv_detector_cvad_set_sensitivity(cvad_state, sensitivity); + + return cvad_state; +} + +void wv_detector_cvad_clean(s_wv_detector_cvad_state *cvad_state) +{ + free(cvad_state); +} + +void wv_detector_cvad_set_sensitivity(s_wv_detector_cvad_state *cvad_state, int sensitivity) +{ + float sensitivity_frac = fmax(0,fmin(100,sensitivity))/100.0; + cvad_state->n_frames_check_start=DETECTOR_CVAD_N_FRAMES_CHECK_START; + cvad_state->n_frames_check_end_short=DETECTOR_CVAD_N_FRAMES_CHECK_END_SHORT; + cvad_state->n_frames_check_end_long=DETECTOR_CVAD_N_FRAMES_CHECK_END_LONG; + + cvad_state->start_sum_threshold = DETECTOR_CVAD_COUNT_SUM_START_SENSITIVE*sensitivity_frac; + cvad_state->start_sum_threshold += DETECTOR_CVAD_COUNT_SUM_START*(1-sensitivity_frac); + + cvad_state->end_sum_short_coeff = DETECTOR_CVAD_COUNT_END_SHORT_FACTOR_SENSITIVE*sensitivity_frac; + cvad_state->end_sum_short_coeff += DETECTOR_CVAD_COUNT_END_SHORT_FACTOR*(1-sensitivity_frac); + + cvad_state->end_sum_long_coeff = DETECTOR_CVAD_COUNT_END_LONG_FACTOR_SENSITIVE*sensitivity_frac; + cvad_state->end_sum_long_coeff += DETECTOR_CVAD_COUNT_END_LONG_FACTOR*(1-sensitivity_frac); +} + +void wv_detector_cvad_update_ref_levels(s_wv_detector_cvad_state *cvad_state, + double *band_energy, + double dfc, + double sfm) +{ + int b=0; + if (!cvad_state->thresh_initialized) { + //if still initializing, accumulate values to average + for(b=0; bref_energy[b] += band_energy[b]; + } + + + cvad_state->ref_sfm += sfm; + + cvad_state->dfc_history[cvad_state->frame_number] = dfc > 0 ? log(dfc) : 0; + } + + //record energy history + for(b=0; benergy_history[b][cvad_state->energy_history_index] = band_energy[b]; + } + cvad_state->energy_history_index++; + cvad_state->energy_history_index%=DETECTOR_CVAD_ENERGY_MEMORY; + + if (cvad_state->frame_number >= DETECTOR_CVAD_FRAMES_INIT) { + if(!cvad_state->thresh_initialized) { + //if done initializing, divide by number of samples to get an average + cvad_state->thresh_initialized = 1; + for(b=0; bref_energy[b] /= cvad_state->frame_number; + } + + cvad_state->ref_sfm /= cvad_state->frame_number; + + double sum = 0; + double sq_sum = 0; + for(b=0; bref_dfc+=cvad_state->dfc_history[b]; + sum += cvad_state->dfc_history[b]; + sq_sum += pow(cvad_state->dfc_history[b],2); + } + cvad_state->ref_dfc /= cvad_state->frame_number; + cvad_state->ref_dfc_var = (sq_sum-sum*sum/cvad_state->frame_number)/(cvad_state->frame_number -1); + + } else if (cvad_state->talking == 0) { + //otherwise update thresholds based on adaptive rules if there's no speech + wv_detector_cvad_modify_update_coeffs(cvad_state); + for(b=0; bref_energy[b] *= (1-cvad_state->energy_update_coeff[b]); + cvad_state->ref_energy[b] += cvad_state->energy_update_coeff[b]*band_energy[b]; + } + + } + } + +} + +void vw_detector_cvad_set_threshold(s_wv_detector_cvad_state *cvad_state) +{ + //update thresholds to be a multiple of the reference level + int b; + cvad_state->th_energy[0] = cvad_state->ref_energy[0]*cvad_state->energy_thresh_coeff_lower; + for(b=1; bth_energy[b] = cvad_state->ref_energy[b]*cvad_state->energy_thresh_coeff_upper; + } + cvad_state->th_dfc = cvad_state->ref_dfc+cvad_state->dfc_thresh; + cvad_state->th_sfm = cvad_state->ref_sfm+cvad_state->sfm_thresh; +} + +void wv_detector_cvad_modify_update_coeffs(s_wv_detector_cvad_state *cvad_state){ + int b; + for(b=0; benergy_history[b][h]; + sq_sum+=pow(cvad_state->energy_history[b][h],2); + } + double variance = (sq_sum-sum*sum/DETECTOR_CVAD_ENERGY_MEMORY)/(DETECTOR_CVAD_ENERGY_MEMORY-1); + double ratio = variance/cvad_state->energy_prev_variance[b]; + if(ratio > 1.25){ + cvad_state->energy_update_coeff[b] = 0.25; + } else if(ratio > 1.10){ + cvad_state->energy_update_coeff[b] = 0.20; + } else if(ratio > 1.00){ + cvad_state->energy_update_coeff[b] = 0.15; + } else if(ratio > 0.00){ + cvad_state->energy_update_coeff[b] = 0.10; + } else { + //negative value indicates that this is the first pass of variance. Just set the coeff to 0.2 + cvad_state->energy_update_coeff[b] = 0.20; + } + cvad_state->energy_prev_variance[b] = variance; + } +} + +short int vw_detector_cvad_check_frame(s_wv_detector_cvad_state *cvad_state, double *band_energy, double dfc, double sfm, int zero_crossings) +{ + short int counter; + + counter = 0; + + int band_counter = 0; + if (band_energy[0] > cvad_state->th_energy[0]) { + counter += 2; + } + + int b; + for(b=1; b cvad_state->th_energy[b]){ + band_counter++; + } + } + if(band_counter >= 2){ + counter+=2; + } + + if (fabs((dfc > 0 ? log(dfc): 0) - cvad_state->ref_dfc) > cvad_state->ref_dfc_var) { + counter++; + } + if (sfm > cvad_state->th_sfm) { + counter++; + } + if(zero_crossings >= cvad_state->min_zero_crossings && zero_crossings <= cvad_state->max_zero_crossings){ + counter++; + } + + return counter; +} + + +double frames_detector_cvad_most_dominant_freq(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double nb_samples) +{ + double k = 0.0f; + double max = 0.0f; + double amplitude_minimum = 1.0f; + int i; + + for (i = 0; i < nb_modules; i++) { + if (fft_mags[i] > max && fft_mags[i] > amplitude_minimum) { + max = fft_mags[i]; + k = i; + } + } + + return k * (double)cvad_state->sample_freq / (double)nb_samples; +} + +void frames_detector_cvad_multiband_energy(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double *band_energy, int nb_samples){ + + int b = 0; + int k = 0; + + for(b = 0; bsample_freq/nb_samples < 1000*(b+1)){ + band_energy[b]+=fft_mags[k]; + k++; + } + } + +} + +double frames_detector_cvad_spectral_flatness(float *fft_mags, int nb) +{ + double geo_mean = 0.0f; + double arithm_mean = 0.0f; + double sfm = 0.0f; + int i; + + for (i = 0; i < nb; i++) { + if (fft_mags[i] != 0.0f) { + geo_mean += log(fft_mags[i]); + arithm_mean += fft_mags[i]; + } + } + geo_mean = exp(geo_mean / (double) nb); + arithm_mean = arithm_mean / (double) nb; + sfm = 10 * log10(geo_mean / arithm_mean); + sfm = fabs(sfm); + + return sfm; +} + +int frames_detector_cvad_zero_crossings(short int *samples, int nb){ + int num_zero_crossings = 0; + int i; + + for(i=1; iprevious_state[cvad_state->previous_state_index] = value; + cvad_state->previous_state_index++; + cvad_state->previous_state_index%=DETECTOR_CVAD_RESULT_MEMORY; +} + +static int frame_memory_sum_last_n(s_wv_detector_cvad_state *cvad_state, int nb) +{ + int i = 0; + int sum = 0; + + for (i = 0; i < nb; i++) { + int indx = (cvad_state->previous_state_index - (i+1) + DETECTOR_CVAD_RESULT_MEMORY) % DETECTOR_CVAD_RESULT_MEMORY; + sum += cvad_state->previous_state[indx]; + } + + return sum; +} + diff --git a/android/src/main/jni/WITCvad.h b/android/src/main/jni/WITCvad.h new file mode 100644 index 00000000..48795946 --- /dev/null +++ b/android/src/main/jni/WITCvad.h @@ -0,0 +1,169 @@ +// +// WITCvad.h +// Wit +// +// Created by Anthony Kesich on 11/12/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#ifndef Wit_WITCvad_h +#define Wit_WITCvad_h + + +#include +#include +#include +#include + + +/* + * This speech algorithm looks at multiple auditory compenents related to speech: + * - Energy divided into 1 KHz bands + * - Dominant Frequency Component + * - Spectral Flatness Measure + * - Zero-crossings + * + * If many features of speech are present for a period of time (~150 ms), speech is detected. + * The end of speech is determined by most features of speech disappearing for an extended period of time (~1 sec) + */ + +#define DETECTOR_CVAD_FRAMES_INIT 40 /* number of frames to use to initialize values */ +#define DETECTOR_CVAD_E_TH_COEFF_LOW_BAND 2.5f /* Energy threshold coefficient */ +#define DETECTOR_CVAD_E_TH_COEFF_UPPER_BANDS 2.0f /* Energy threshold coefficient */ +#define DETECTOR_CVAD_SFM_TH 3.0f /* Spectral Flatness Measure threshold */ +#define DETECTOR_CVAD_DFC_TH 250.0f /* most Dominant Frequency Component threshold */ +#define DETECTOR_CVAD_MIN_ZERO_CROSSINGS 5 /* fewest zero crossings for speech */ +#define DETECTOR_CVAD_MAX_ZERO_CROSSINGS 15 /* maximum zero crossings for speech */ +#define DETECTOR_CVAD_RESULT_MEMORY 130 /* number of frame results to keep in memory */ +#define DETECTOR_CVAD_ENERGY_MEMORY 20 /* number of frame results to keep in memory */ +#define DETECTOR_CVAD_N_ENERGY_BANDS 5 /* number of 1 KHz energy bands to compute */ +#define DETECTOR_CVAD_MINIMUM_LENGTH 1000 /* minimum length of vad in ms */ + +//final speech detection variables +#define DETECTOR_CVAD_N_FRAMES_CHECK_START 15 +#define DETECTOR_CVAD_COUNT_SUM_START 4.5*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_COUNT_SUM_START_SENSITIVE 3.8*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_N_FRAMES_CHECK_END_SHORT 1.5*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_COUNT_END_SHORT_FACTOR 0.6 +#define DETECTOR_CVAD_COUNT_END_SHORT_FACTOR_SENSITIVE 0.3 +#define DETECTOR_CVAD_N_FRAMES_CHECK_END_LONG 6.5*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_COUNT_END_LONG_FACTOR 1.8 +#define DETECTOR_CVAD_COUNT_END_LONG_FACTOR_SENSITIVE 1.5 + +typedef struct { + double energy_thresh_coeff_lower; + double energy_thresh_coeff_upper; + double sfm_thresh; + double dfc_thresh; + double th_energy[DETECTOR_CVAD_N_ENERGY_BANDS]; + double th_sfm; + double th_dfc; + double ref_energy[DETECTOR_CVAD_N_ENERGY_BANDS]; + double ref_sfm; + double ref_dfc; + double ref_dfc_var; + double energy_update_coeff[DETECTOR_CVAD_N_ENERGY_BANDS]; + double energy_prev_variance[DETECTOR_CVAD_N_ENERGY_BANDS]; + double energy_history[DETECTOR_CVAD_N_ENERGY_BANDS][DETECTOR_CVAD_ENERGY_MEMORY]; + double sfm_update_coeff; + double dfc_history[DETECTOR_CVAD_FRAMES_INIT]; + double dfc_update_coeff; + float end_sum_long_coeff; + float end_sum_short_coeff; + int frame_number; + int speech_start_frame; + int max_speech_time; + int energy_history_index; + int min_zero_crossings; + int max_zero_crossings; + int thresh_initialized; + int silence_count; + int talking; + int sample_freq; + int samples_per_frame; + int max_start_sum; + int n_frames_check_start; + int n_frames_check_end_short; + int n_frames_check_end_long; + int start_sum_threshold; + int previous_state_index; + short int previous_state[DETECTOR_CVAD_RESULT_MEMORY]; +} s_wv_detector_cvad_state; + +/* + Main entry point to the detection algorithm. + This returns a -1 if there is no change in state, a 1 if some started talking, and a 0 if speech ended + */ +int wvs_cvad_detect_talking(s_wv_detector_cvad_state *cvad_state, short int *samples, float *fft_mags); + + +/* + Initiate the cvad_state structure, which represents the state of + one instance of the algorithm + + sensitive mode: 0 if for a close-up mic, 1 if for a fixed, distant mic + */ +s_wv_detector_cvad_state* wv_detector_cvad_init(int sample_rate, int sensitivity, int speech_timeout); + +/* + Safely frees memory for a cvad_state + */ +void wv_detector_cvad_clean(s_wv_detector_cvad_state *cvad_state); + +/* + Set VAD sensitivity (0-100) + - Lower values are for strong voice signals like for a cellphone or personal mic + - Higher values are for use with a fixed-position mic or any application with voice burried in ambient noise + - Defaults to 0 + */ + +void wv_detector_cvad_set_sensitivity(s_wv_detector_cvad_state *cvad_state, int sensitivity); + +/* + Set the reference values of the energy, most dominant frequency componant and the spectral flatness measure. + The threshold value is then set based on the "background" reference levels + */ +void wv_detector_cvad_update_ref_levels(s_wv_detector_cvad_state *cvad_state, double *band_energy, double dfc, double sfm); + +/* + Set the threshhold on the cvad_state. + */ +void vw_detector_cvad_set_threshold(s_wv_detector_cvad_state *cvad_state); + +/* + Computes the variance of the energy over the past few windows and adapts the update ceoffs accordingly + */ +void wv_detector_cvad_modify_update_coeffs(s_wv_detector_cvad_state *cvad_state); + +/* + Compare the distance between the value and the minimum value of each component and return how many + component(s) reponded positiviely. + Each frame with more than 2 (out of 3) matching features are qualified as a speech frame. + example : energy - cvad_state->min_energy > cvad_state->th_energy + */ +short int vw_detector_cvad_check_frame(s_wv_detector_cvad_state *cvad_state, double *band_energy, double dfc, double sfm, int zero_crossings); + +/* + Return the frequency with the biggest amplitude (from a frame). + */ +double frames_detector_cvad_most_dominant_freq(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double nb_samples); + +/* + Computes the energy of the first DETECTOR_CVAD_N_ENERGY_BANDS 1 KHz bands + */ +void frames_detector_cvad_multiband_energy(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double *band_energy, int nb_samples); + +/* + Compute the spectral flatness of a frame. + It tells us if all the frequencies have a similar amplitude, which would means noise + or if there is some dominant frequencies, which could mean voice. + */ +double frames_detector_cvad_spectral_flatness(float *fft_mags, int nb); + +/* + Counts the number of times the signal crosses zero + Even soft vocalizations have a fairly regular number of zero crossings (~5-15 for 10ms) + */ +int frames_detector_cvad_zero_crossings(short int *samples, int nb); + +#endif diff --git a/android/src/main/jni/WITVadSimple.c b/android/src/main/jni/WITVadSimple.c new file mode 100644 index 00000000..a94d8896 --- /dev/null +++ b/android/src/main/jni/WITVadSimple.c @@ -0,0 +1,206 @@ +// +// WITVadSimple.c +// Wit +// +// Created by Aric Lasry on 8/6/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + + +#include "WITVadSimple.h" + +/** + * wvs_pcm16short2dbfs - converts short (16 bits) samples to decibel full scale + * @samples: array of pcm 16 bits samples + * @size: numbers of sample + * + * Return a new allocated buffer of double, which will need to be free later + */ +static double * wvs_pcm16short2dbfs(short int *samples, int size); + +static double frames_detector_esf_energy(double *samples, int nb_samples); +static void detector_esf_minimum(wvs_state *state, double energy, int n); +static int detector_esf_check_frame(wvs_state *state, double energy); +static void memory_push(int *memory, int length, int value); +static int frame_memory_lte(int *memory, int value, int nb); +static int frame_memory_gte(int *memory, int value, int nb); +static int wvs_check(wvs_state *state, double *samples, int nb_samples); + + +int wvs_still_talking(wvs_state *state, short int *samples, int nb_samples) +{ + double *dbfss; + double db; + int result; + int i_sample; + + dbfss = wvs_pcm16short2dbfs(samples, nb_samples); + for (i_sample = 0; i_sample < nb_samples; i_sample++) { + db = dbfss[i_sample]; + if (isinf(db)) { + continue; + } + if (state->current_nb_samples == state->samples_per_frame) { + result = wvs_check(state, state->samples, state->current_nb_samples); + if (result == 0 || result == 1) { + free(dbfss); + return result; + } + state->current_nb_samples = 0; + } + state->samples[state->current_nb_samples] = db; + state->current_nb_samples++; + } + free(dbfss); + + return -1; +} + +static int wvs_check(wvs_state *state, double *samples, int nb_samples) +{ + int counter; + double energy; + int action; + char debug_msg[128]; + + action = -1; + energy = frames_detector_esf_energy(samples, nb_samples); + + if (state->sequence <= state->init_frames) { + detector_esf_minimum(state, energy, state->sequence); + } + counter = detector_esf_check_frame(state, energy); + if (state->sequence >= state->init_frames && !counter && !state->talking) { + detector_esf_minimum(state, energy, state->sequence); + } + memory_push(state->previous_state, state->previous_state_maxlen, counter); + if (state->sequence < state->init_frames) { + state->sequence++; + return -1; + } + if (state->talking == 0 && frame_memory_gte(state->previous_state, 1, 10)) { + state->talking = 1; + __android_log_write(ANDROID_LOG_DEBUG, "WitVAD", "Speak start"); + action = 1; + } + else if (state->talking == 1 && frame_memory_lte(state->previous_state, 0, state->previous_state_maxlen)) { + state->talking = 0; + action = 0; + __android_log_write(ANDROID_LOG_DEBUG, "WitVAD", "Speak end"); + } + state->sequence++; + + return action; +} + +wvs_state *wvs_init(double threshold, int sample_rate) +{ + wvs_state *state; + + state = malloc(sizeof(*state)); + state->sequence = 0; + state->min_initialized = 0; + state->init_frames = 30; + state->energy_threshold = threshold; + state->previous_state_maxlen = 50; + state->previous_state = malloc(sizeof(*state->previous_state) * state->previous_state_maxlen); + state->talking = 0; + state->sample_rate = sample_rate; + state->samples_per_frame = state->sample_rate / 100; + state->samples = malloc(sizeof(*state->samples) * state->samples_per_frame); + state->current_nb_samples = 0; + state->min_energy = 0.0; + + return state; +} + +void wvs_clean(wvs_state *state) +{ + free(state->samples); + free(state->previous_state); + free(state); +} + +static double * wvs_pcm16short2dbfs(short int *samples, int size) +{ + double *dbfss; + double max_ref; + int i; + + max_ref = 32768; //pow(2.0, 16.0) / 2; signed 16 bits w/o the -1 + dbfss = malloc(sizeof(*dbfss) * size); + + for (i = 0; i < size; i++) { + dbfss[i] = 0 - 20 * log10(fabs(samples[i] / max_ref)); + } + + return dbfss; +} + +static double frames_detector_esf_energy(double *samples, int nb_samples) +{ + double energy = 0.0f; + int i; + + for (i = 0; i < nb_samples; i++) { + energy += samples[i]; + } + energy /= nb_samples; + + return energy; +} + +static void detector_esf_minimum(wvs_state *state, double energy, int n) +{ + n = (n > 10) ? 10 : n; //this correspond to 1/10 of a second + state->min_energy = (state->min_energy * n + energy) / (n + 1); + state->min_initialized = 1; +} + +static int detector_esf_check_frame(wvs_state *state, double energy) +{ + int counter; + + counter = 0; + char debug_msg[200]; + + if ((0 - (energy - state->min_energy)) >= state->energy_threshold) { + counter++; + } + + return counter; +} + +static void memory_push(int *memory, int length, int value) +{ + while (--length) { + memory[length] = memory[length - 1]; + } + memory[0] = value; +} + +static int frame_memory_gte(int *memory, int value, int nb) +{ + int i = 0; + + for (i = 0; i < nb; i++) { + if (memory[i] < value) { + return 0; + } + } + + return 1; +} + +static int frame_memory_lte(int *memory, int value, int nb) +{ + int i; + + for (i = 0; i < nb; i++) { + if (memory[i] > value) { + return 0; + } + } + + return 1; +} diff --git a/android/src/main/jni/WITVadSimple.h b/android/src/main/jni/WITVadSimple.h new file mode 100644 index 00000000..7c1a9802 --- /dev/null +++ b/android/src/main/jni/WITVadSimple.h @@ -0,0 +1,73 @@ +// +// WITVadSimple.h +// Wit +// +// Created by Aric Lasry on 8/6/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#ifndef Wit_WITVadSimple_h +#define Wit_WITVadSimple_h + +#include +#include +#include +#include +#include + + + +/** + * This voice activity detection is very simple. It computes the average of the + * audio powers from the beginning and the last second, and compare the distance + * between the two with a pre-defined threshold. + * + * The "audio powers" are average of audio chunks in DBFS. It could also be PCM samples... + */ + +/* + state of the voice activity detection algorithm. + */ +typedef struct { + /* frame number */ + int sequence; + + /* is the environment initialized? */ + int min_initialized; + + /* frame number needed for initialization */ + int init_frames; + + double energy_threshold; + + double min_energy; + + int *previous_state; + + int previous_state_maxlen; + + int talking; + + /* number of sample per second */ + int sample_rate; + + /* number of samples needed to calculate the feature(s) */ + int samples_per_frame; + + /* samples list to send to the checking function when enough are available */ + double *samples; + + int current_nb_samples; +} wvs_state; + +int wvs_still_talking(wvs_state *state, short int *samples, int nb_samples); + +wvs_state *wvs_init(double threshold, int sample_rate); + +/** + * wvs_clean - clean a wvs_state* structure + * @state: the structure to free. + */ +void wvs_clean(wvs_state *state); + +#endif diff --git a/android/src/main/jni/WITVadWrapper.c b/android/src/main/jni/WITVadWrapper.c new file mode 100644 index 00000000..717de8a4 --- /dev/null +++ b/android/src/main/jni/WITVadWrapper.c @@ -0,0 +1,56 @@ +#include "WITCvad.h" +#include + + +static s_wv_detector_cvad_state* wit_vad_g_struct = 0; + +int Java_com_rnim_rn_audio_RecordWaveTask_VadInit(JNIEnv *env, jobject obj, jint vadSensitivity, jint vadTimeout) +{ + int sample_rate = 16000; + vadSensitivity = (int)fmax(0,fmin(100,vadSensitivity)); //bounds-checking + wit_vad_g_struct = wv_detector_cvad_init(sample_rate, (int)vadSensitivity, (int)vadTimeout); + + return 0; +} + + +int Java_com_rnim_rn_audio_RecordWaveTask_VadStillTalking(JNIEnv *env, jobject obj, jshortArray java_arr, jfloatArray java_fft_arr) +{ + short int *samples; + float *fft_mags; + int i, sum = 0; + int result; + jshort *native_arr = (*env)->GetShortArrayElements(env, java_arr, NULL); + jfloat *native_fft_arr = (*env)->GetFloatArrayElements(env, java_fft_arr, NULL); + int arr_len = wit_vad_g_struct->samples_per_frame; + + samples = malloc(sizeof(*samples) * arr_len); + for (i = 0; i < arr_len; i++) { + samples[i] = native_arr[i]; + } + (*env)->ReleaseShortArrayElements(env, java_arr, native_arr, 0); + + fft_mags = malloc(sizeof(*fft_mags) * arr_len); + for (i = 0; i < arr_len/2; i++) { + fft_mags[i] = native_fft_arr[i]; + } + (*env)->ReleaseFloatArrayElements(env, java_fft_arr, native_fft_arr, 0); + + result = wvs_cvad_detect_talking(wit_vad_g_struct, samples, fft_mags); + free(samples); + free(fft_mags); + + return result; +} + +void Java_com_rnim_rn_audio_RecordWaveTask_VadClean() +{ + if (wit_vad_g_struct) { + wv_detector_cvad_clean(wit_vad_g_struct); + wit_vad_g_struct = 0; + } +} + +int Java_com_rnim_rn_audio_RecordWaveTask_GetVadSamplesPerFrame(){ + return wit_vad_g_struct->samples_per_frame; +} diff --git a/index.js b/index.js index 93eaefc1..14b07c4c 100644 --- a/index.js +++ b/index.js @@ -40,6 +40,15 @@ var AudioRecorder = { } ); + if (this.vadReceivedSubscription) this.vadReceivedSubscription.remove(); + this.vadReceivedSubscription = NativeAppEventEmitter.addListener('vadReceived', + (vadResult) => { + if (this.onVadReceived) { + this.onVadReceived(vadResult); + } + } + ); + var defaultOptions = { SampleRate: 44100.0, Channels: 1, @@ -81,6 +90,7 @@ var AudioRecorder = { if (this.progressSubscription) this.progressSubscription.remove(); if (this.finishedSubscription) this.finishedSubscription.remove(); if (this.dataReceivedSubscription) this.dataReceivedSubscription.remove(); + if (this.vadReceivedSubscription) this.vadReceivedSubscription.remove(); }, }; From 1848031dc2ed4a82a0b2e5f85f0d7750dfac26ff Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 8 Aug 2017 10:00:51 +0900 Subject: [PATCH 21/26] Fix iOS empty AVAudioSession --- ios/AudioRecorderManager.m | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 6f83244b..8d13f8d3 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -31,7 +31,6 @@ @implementation AudioRecorderManager { NSNumber *_audioEncoding; NSNumber *_audioChannels; NSNumber *_audioSampleRate; - AVAudioSession *_recordSession; BOOL _meteringEnabled; int _bufferSize; } @@ -223,7 +222,7 @@ - (NSString *) applicationDocumentsDirectory NSLog(@"startStreaming"); NSLog(@"%@", _audioFileURL); [self startProgressTimer]; - [_recordSession setActive:YES error:nil]; + [[AVAudioSession sharedInstance] setActive:YES error:nil]; [streamingModule start]; } @@ -231,7 +230,7 @@ - (NSString *) applicationDocumentsDirectory { NSLog(@"stopStreaming"); [streamingModule stop]; - [_recordSession setActive:NO error:nil]; + [[AVAudioSession sharedInstance] setActive:NO error:nil]; _prevProgressUpdateTime = nil; [self finishRecording: true]; } From b3b3bf30a8f95cdb13ba8a7315dca597257bd827 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 8 Aug 2017 14:53:14 +0900 Subject: [PATCH 22/26] Apply WIT VAD in iOS --- AudioExample/AudioExample.js | 2 +- ios/AudioRecorderManager.h | 5 +- ios/AudioRecorderManager.m | 30 +++ ios/RNAudio.xcodeproj/project.pbxproj | 12 + ios/WITCvad.h | 169 ++++++++++++ ios/WITCvad.m | 357 ++++++++++++++++++++++++++ ios/WITVad.h | 32 +++ ios/WITVad.m | 123 +++++++++ 8 files changed, 727 insertions(+), 3 deletions(-) create mode 100644 ios/WITCvad.h create mode 100644 ios/WITCvad.m create mode 100644 ios/WITVad.h create mode 100644 ios/WITVad.m diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 6b9b2b84..62f088c4 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -26,7 +26,7 @@ class AudioExample extends Component { prepareRecordingPath(audioPath){ AudioRecorder.prepareStreamingAtPath(this.state.audioPath, 1600, { - SampleRate: 16000, + SampleRate: 22050, Channels: 1, // Following is not supported // AudioQuality: "Low", diff --git a/ios/AudioRecorderManager.h b/ios/AudioRecorderManager.h index d117e923..743e8e0c 100644 --- a/ios/AudioRecorderManager.h +++ b/ios/AudioRecorderManager.h @@ -9,7 +9,8 @@ #import #import #import +#import "WITVad.h" -@interface AudioRecorderManager : NSObject +@interface AudioRecorderManager : NSObject -@end \ No newline at end of file +@end diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 8d13f8d3..0ac10f57 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -13,10 +13,12 @@ #import #import #import "StreamingModule.h" +#import "WITVad.h" NSString *const AudioRecorderEventProgress = @"recordingProgress"; NSString *const AudioRecorderEventFinished = @"recordingFinished"; NSString *const AudioRecorderEventDataReceived = @"dataReceived"; +NSString *const AudioRecorderEventVadReceived = @"vadReceived"; @implementation AudioRecorderManager { @@ -36,6 +38,7 @@ @implementation AudioRecorderManager { } StreamingModule* streamingModule; +WITVad *vad; @synthesize bridge = _bridge; @@ -199,6 +202,13 @@ - (NSString *) applicationDocumentsDirectory _meteringEnabled = meteringEnabled; } + if (vad == nil) { + vad = [[WITVad alloc] initWithAudioSampleRate:[_audioSampleRate doubleValue] + vadSensitivity:0 + vadTimeout:7000]; + vad.delegate = self; + } + streamingModule = [[StreamingModule alloc] init]; [streamingModule prepare:_audioFileURL bufferSize:_bufferSize @@ -212,6 +222,12 @@ - (NSString *) applicationDocumentsDirectory [body addObject: value]; } } + if (vad != nil) { + int16_t *const int16ChannelData =[buf int16ChannelData][0]; + int length = buf.frameCapacity * buf.format.streamDescription->mBytesPerFrame; + NSData *audio = [[NSData alloc] initWithBytes:int16ChannelData length:length]; + [vad gotAudioSamples:audio]; + } [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventDataReceived body:body]; } ]; @@ -232,6 +248,10 @@ - (NSString *) applicationDocumentsDirectory [streamingModule stop]; [[AVAudioSession sharedInstance] setActive:NO error:nil]; _prevProgressUpdateTime = nil; + if (vad) { + vad.delegate = nil; + vad = nil; + } [self finishRecording: true]; } @@ -259,4 +279,14 @@ - (NSDictionary *)constantsToExport }; } +-(void)vadStartedTalking { + NSLog(@"Started Talking"); + [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventVadReceived body:[NSNumber numberWithInt:1]]; +} + +-(void)vadStoppedTalking { + NSLog(@"Stopped Talking"); + [self.bridge.eventDispatcher sendAppEventWithName:AudioRecorderEventVadReceived body:[NSNumber numberWithInt:0]]; +} + @end diff --git a/ios/RNAudio.xcodeproj/project.pbxproj b/ios/RNAudio.xcodeproj/project.pbxproj index a1c6e8e4..d5178132 100644 --- a/ios/RNAudio.xcodeproj/project.pbxproj +++ b/ios/RNAudio.xcodeproj/project.pbxproj @@ -9,6 +9,8 @@ /* Begin PBXBuildFile section */ 38D7625B1EDD3F58007B8DE3 /* StreamingModule.m in Sources */ = {isa = PBXBuildFile; fileRef = 38D762591EDD3F58007B8DE3 /* StreamingModule.m */; }; 429D457A1CFC96E100CBD51A /* AudioRecorderManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */; }; + 7664CAD31F39482200FC59DE /* WITCvad.m in Sources */ = {isa = PBXBuildFile; fileRef = 7664CAD21F39482200FC59DE /* WITCvad.m */; }; + 7664CAD61F394C8100FC59DE /* WITVad.m in Sources */ = {isa = PBXBuildFile; fileRef = 7664CAD51F394C8100FC59DE /* WITVad.m */; }; 76A04C0C1EDD91B800516515 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 76A04C0B1EDD91B800516515 /* AVFoundation.framework */; }; /* End PBXBuildFile section */ @@ -30,6 +32,10 @@ 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = AudioRecorderManager.m; sourceTree = SOURCE_ROOT; }; 429D45771CFC96E100CBD51A /* AudioRecorderManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AudioRecorderManager.h; sourceTree = SOURCE_ROOT; }; 42F559BA1CFC90C400DC3F84 /* libRNAudio.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libRNAudio.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 7664CAD11F39482200FC59DE /* WITCvad.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WITCvad.h; sourceTree = SOURCE_ROOT; }; + 7664CAD21F39482200FC59DE /* WITCvad.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = WITCvad.m; sourceTree = SOURCE_ROOT; }; + 7664CAD41F394C8100FC59DE /* WITVad.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WITVad.h; sourceTree = SOURCE_ROOT; }; + 7664CAD51F394C8100FC59DE /* WITVad.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = WITVad.m; sourceTree = SOURCE_ROOT; }; 76A04C0B1EDD91B800516515 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; }; /* End PBXFileReference section */ @@ -65,6 +71,10 @@ 42F559BC1CFC90C400DC3F84 /* RNAudio */ = { isa = PBXGroup; children = ( + 7664CAD41F394C8100FC59DE /* WITVad.h */, + 7664CAD51F394C8100FC59DE /* WITVad.m */, + 7664CAD11F39482200FC59DE /* WITCvad.h */, + 7664CAD21F39482200FC59DE /* WITCvad.m */, 429D45761CFC96E100CBD51A /* AudioRecorderManager.m */, 429D45771CFC96E100CBD51A /* AudioRecorderManager.h */, 38D762591EDD3F58007B8DE3 /* StreamingModule.m */, @@ -138,6 +148,8 @@ buildActionMask = 2147483647; files = ( 38D7625B1EDD3F58007B8DE3 /* StreamingModule.m in Sources */, + 7664CAD31F39482200FC59DE /* WITCvad.m in Sources */, + 7664CAD61F394C8100FC59DE /* WITVad.m in Sources */, 429D457A1CFC96E100CBD51A /* AudioRecorderManager.m in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/ios/WITCvad.h b/ios/WITCvad.h new file mode 100644 index 00000000..48795946 --- /dev/null +++ b/ios/WITCvad.h @@ -0,0 +1,169 @@ +// +// WITCvad.h +// Wit +// +// Created by Anthony Kesich on 11/12/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#ifndef Wit_WITCvad_h +#define Wit_WITCvad_h + + +#include +#include +#include +#include + + +/* + * This speech algorithm looks at multiple auditory compenents related to speech: + * - Energy divided into 1 KHz bands + * - Dominant Frequency Component + * - Spectral Flatness Measure + * - Zero-crossings + * + * If many features of speech are present for a period of time (~150 ms), speech is detected. + * The end of speech is determined by most features of speech disappearing for an extended period of time (~1 sec) + */ + +#define DETECTOR_CVAD_FRAMES_INIT 40 /* number of frames to use to initialize values */ +#define DETECTOR_CVAD_E_TH_COEFF_LOW_BAND 2.5f /* Energy threshold coefficient */ +#define DETECTOR_CVAD_E_TH_COEFF_UPPER_BANDS 2.0f /* Energy threshold coefficient */ +#define DETECTOR_CVAD_SFM_TH 3.0f /* Spectral Flatness Measure threshold */ +#define DETECTOR_CVAD_DFC_TH 250.0f /* most Dominant Frequency Component threshold */ +#define DETECTOR_CVAD_MIN_ZERO_CROSSINGS 5 /* fewest zero crossings for speech */ +#define DETECTOR_CVAD_MAX_ZERO_CROSSINGS 15 /* maximum zero crossings for speech */ +#define DETECTOR_CVAD_RESULT_MEMORY 130 /* number of frame results to keep in memory */ +#define DETECTOR_CVAD_ENERGY_MEMORY 20 /* number of frame results to keep in memory */ +#define DETECTOR_CVAD_N_ENERGY_BANDS 5 /* number of 1 KHz energy bands to compute */ +#define DETECTOR_CVAD_MINIMUM_LENGTH 1000 /* minimum length of vad in ms */ + +//final speech detection variables +#define DETECTOR_CVAD_N_FRAMES_CHECK_START 15 +#define DETECTOR_CVAD_COUNT_SUM_START 4.5*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_COUNT_SUM_START_SENSITIVE 3.8*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_N_FRAMES_CHECK_END_SHORT 1.5*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_COUNT_END_SHORT_FACTOR 0.6 +#define DETECTOR_CVAD_COUNT_END_SHORT_FACTOR_SENSITIVE 0.3 +#define DETECTOR_CVAD_N_FRAMES_CHECK_END_LONG 6.5*DETECTOR_CVAD_N_FRAMES_CHECK_START +#define DETECTOR_CVAD_COUNT_END_LONG_FACTOR 1.8 +#define DETECTOR_CVAD_COUNT_END_LONG_FACTOR_SENSITIVE 1.5 + +typedef struct { + double energy_thresh_coeff_lower; + double energy_thresh_coeff_upper; + double sfm_thresh; + double dfc_thresh; + double th_energy[DETECTOR_CVAD_N_ENERGY_BANDS]; + double th_sfm; + double th_dfc; + double ref_energy[DETECTOR_CVAD_N_ENERGY_BANDS]; + double ref_sfm; + double ref_dfc; + double ref_dfc_var; + double energy_update_coeff[DETECTOR_CVAD_N_ENERGY_BANDS]; + double energy_prev_variance[DETECTOR_CVAD_N_ENERGY_BANDS]; + double energy_history[DETECTOR_CVAD_N_ENERGY_BANDS][DETECTOR_CVAD_ENERGY_MEMORY]; + double sfm_update_coeff; + double dfc_history[DETECTOR_CVAD_FRAMES_INIT]; + double dfc_update_coeff; + float end_sum_long_coeff; + float end_sum_short_coeff; + int frame_number; + int speech_start_frame; + int max_speech_time; + int energy_history_index; + int min_zero_crossings; + int max_zero_crossings; + int thresh_initialized; + int silence_count; + int talking; + int sample_freq; + int samples_per_frame; + int max_start_sum; + int n_frames_check_start; + int n_frames_check_end_short; + int n_frames_check_end_long; + int start_sum_threshold; + int previous_state_index; + short int previous_state[DETECTOR_CVAD_RESULT_MEMORY]; +} s_wv_detector_cvad_state; + +/* + Main entry point to the detection algorithm. + This returns a -1 if there is no change in state, a 1 if some started talking, and a 0 if speech ended + */ +int wvs_cvad_detect_talking(s_wv_detector_cvad_state *cvad_state, short int *samples, float *fft_mags); + + +/* + Initiate the cvad_state structure, which represents the state of + one instance of the algorithm + + sensitive mode: 0 if for a close-up mic, 1 if for a fixed, distant mic + */ +s_wv_detector_cvad_state* wv_detector_cvad_init(int sample_rate, int sensitivity, int speech_timeout); + +/* + Safely frees memory for a cvad_state + */ +void wv_detector_cvad_clean(s_wv_detector_cvad_state *cvad_state); + +/* + Set VAD sensitivity (0-100) + - Lower values are for strong voice signals like for a cellphone or personal mic + - Higher values are for use with a fixed-position mic or any application with voice burried in ambient noise + - Defaults to 0 + */ + +void wv_detector_cvad_set_sensitivity(s_wv_detector_cvad_state *cvad_state, int sensitivity); + +/* + Set the reference values of the energy, most dominant frequency componant and the spectral flatness measure. + The threshold value is then set based on the "background" reference levels + */ +void wv_detector_cvad_update_ref_levels(s_wv_detector_cvad_state *cvad_state, double *band_energy, double dfc, double sfm); + +/* + Set the threshhold on the cvad_state. + */ +void vw_detector_cvad_set_threshold(s_wv_detector_cvad_state *cvad_state); + +/* + Computes the variance of the energy over the past few windows and adapts the update ceoffs accordingly + */ +void wv_detector_cvad_modify_update_coeffs(s_wv_detector_cvad_state *cvad_state); + +/* + Compare the distance between the value and the minimum value of each component and return how many + component(s) reponded positiviely. + Each frame with more than 2 (out of 3) matching features are qualified as a speech frame. + example : energy - cvad_state->min_energy > cvad_state->th_energy + */ +short int vw_detector_cvad_check_frame(s_wv_detector_cvad_state *cvad_state, double *band_energy, double dfc, double sfm, int zero_crossings); + +/* + Return the frequency with the biggest amplitude (from a frame). + */ +double frames_detector_cvad_most_dominant_freq(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double nb_samples); + +/* + Computes the energy of the first DETECTOR_CVAD_N_ENERGY_BANDS 1 KHz bands + */ +void frames_detector_cvad_multiband_energy(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double *band_energy, int nb_samples); + +/* + Compute the spectral flatness of a frame. + It tells us if all the frequencies have a similar amplitude, which would means noise + or if there is some dominant frequencies, which could mean voice. + */ +double frames_detector_cvad_spectral_flatness(float *fft_mags, int nb); + +/* + Counts the number of times the signal crosses zero + Even soft vocalizations have a fairly regular number of zero crossings (~5-15 for 10ms) + */ +int frames_detector_cvad_zero_crossings(short int *samples, int nb); + +#endif diff --git a/ios/WITCvad.m b/ios/WITCvad.m new file mode 100644 index 00000000..91eabf30 --- /dev/null +++ b/ios/WITCvad.m @@ -0,0 +1,357 @@ +// +// WITCvad.m +// Wit +// +// Created by Anthony Kesich on 11/12/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#include "WITCvad.h" + + +/* + Adds value to the head of memory + */ +static void frame_memory_push(s_wv_detector_cvad_state *cvad_state, short int value); + +/* + Sums up the last N values of memory + */ +static int frame_memory_sum_last_n(s_wv_detector_cvad_state *cvad_state, int nb); + + +int wvs_cvad_detect_talking(s_wv_detector_cvad_state *cvad_state, short int *samples, float *fft_mags) +{ + double dfc; + double band_energy[DETECTOR_CVAD_N_ENERGY_BANDS]; + double sfm; + int fft_size = pow(2,floor(log2(cvad_state->samples_per_frame))); + short int counter; + int action = -1; + int zero_crossings; + + //only process cvad_state->samples_per_frame samples at a time + //frames_detector_cvad_fft(samples, fft_modules, cvad_state->samples_per_frame); + dfc = frames_detector_cvad_most_dominant_freq(cvad_state, fft_mags, fft_size, cvad_state->samples_per_frame); + sfm = frames_detector_cvad_spectral_flatness(fft_mags, fft_size); + zero_crossings = frames_detector_cvad_zero_crossings(samples, cvad_state->samples_per_frame); + frames_detector_cvad_multiband_energy(cvad_state, fft_mags, fft_size, band_energy, cvad_state->samples_per_frame); + + vw_detector_cvad_set_threshold(cvad_state); + counter = vw_detector_cvad_check_frame(cvad_state, band_energy, dfc, sfm, zero_crossings); + frame_memory_push(cvad_state, counter); + + if ((counter < 3 && cvad_state->talking == 0) || !cvad_state->thresh_initialized) { + cvad_state->silence_count++; + //only update reference levels if we don't detect speech + wv_detector_cvad_update_ref_levels(cvad_state, band_energy, dfc, sfm); + } + if (cvad_state->thresh_initialized) { + int start_sum = frame_memory_sum_last_n(cvad_state, DETECTOR_CVAD_N_FRAMES_CHECK_START); + int stop_sum_long = frame_memory_sum_last_n(cvad_state, DETECTOR_CVAD_N_FRAMES_CHECK_END_LONG); + int stop_sum_short = frame_memory_sum_last_n(cvad_state, DETECTOR_CVAD_N_FRAMES_CHECK_END_SHORT); + int speech_time = (cvad_state->frame_number-cvad_state->speech_start_frame) * cvad_state->samples_per_frame * 1000 / cvad_state->sample_freq; + + if(start_sum > cvad_state->max_start_sum){ + cvad_state->max_start_sum = start_sum; + } + if (!cvad_state->talking && start_sum >= cvad_state->start_sum_threshold ) { + cvad_state->talking = 1; + cvad_state->speech_start_frame = cvad_state->frame_number; + action = 1; + } + else if (cvad_state->talking && speech_time > DETECTOR_CVAD_MINIMUM_LENGTH + && ((counter < 3 + && stop_sum_long <= cvad_state->max_start_sum*cvad_state->end_sum_long_coeff + && stop_sum_short <= cvad_state->max_start_sum*cvad_state->end_sum_short_coeff) + || (cvad_state->max_speech_time > 0 + && speech_time >= cvad_state->max_speech_time))) { + cvad_state->talking = 0; + action = 0; + cvad_state->max_start_sum = 0; + } + } + + cvad_state->frame_number++; + + return action; +} + +s_wv_detector_cvad_state* wv_detector_cvad_init(int sample_rate, int sensitivity, int speech_timeout) +{ + s_wv_detector_cvad_state *cvad_state = malloc(sizeof(s_wv_detector_cvad_state)); + cvad_state->energy_thresh_coeff_lower = DETECTOR_CVAD_E_TH_COEFF_LOW_BAND; + cvad_state->energy_thresh_coeff_upper = DETECTOR_CVAD_E_TH_COEFF_UPPER_BANDS; + cvad_state->sfm_thresh= DETECTOR_CVAD_SFM_TH; + cvad_state->dfc_thresh= DETECTOR_CVAD_DFC_TH; + cvad_state->min_zero_crossings= DETECTOR_CVAD_MIN_ZERO_CROSSINGS; + cvad_state->max_zero_crossings= DETECTOR_CVAD_MAX_ZERO_CROSSINGS; + memset(cvad_state->energy_update_coeff, 0.20, DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + memset(cvad_state->energy_prev_variance, -1, DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + memset(cvad_state->energy_history, 0, DETECTOR_CVAD_ENERGY_MEMORY * DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + cvad_state->energy_history_index = 0; + cvad_state->dfc_update_coeff = 0.10; + cvad_state->sfm_update_coeff = 0.10; + cvad_state->frame_number = 0; + cvad_state->speech_start_frame = -1; + cvad_state->max_speech_time = speech_timeout; + cvad_state->thresh_initialized = 0; + cvad_state->silence_count = 0; + cvad_state->talking = 0; + memset(cvad_state->ref_energy, 0, DETECTOR_CVAD_N_ENERGY_BANDS * sizeof(double)); + cvad_state->ref_dfc = 0; + cvad_state->ref_sfm = 0; + memset(cvad_state->dfc_history, 0, DETECTOR_CVAD_FRAMES_INIT * sizeof(double)); + cvad_state->sample_freq = sample_rate; + cvad_state->max_start_sum = 0; + cvad_state->samples_per_frame = pow(2,ceil(log2(cvad_state->sample_freq/150))); //around 100 frames per second, but must be a power of two + cvad_state->previous_state_index = 0; + memset(cvad_state->previous_state, 0, DETECTOR_CVAD_RESULT_MEMORY * sizeof(short int)); + + wv_detector_cvad_set_sensitivity(cvad_state, sensitivity); + + return cvad_state; +} + +void wv_detector_cvad_clean(s_wv_detector_cvad_state *cvad_state) +{ + free(cvad_state); +} + +void wv_detector_cvad_set_sensitivity(s_wv_detector_cvad_state *cvad_state, int sensitivity) +{ + float sensitivity_frac = fmax(0,fmin(100,sensitivity))/100.0; + cvad_state->n_frames_check_start=DETECTOR_CVAD_N_FRAMES_CHECK_START; + cvad_state->n_frames_check_end_short=DETECTOR_CVAD_N_FRAMES_CHECK_END_SHORT; + cvad_state->n_frames_check_end_long=DETECTOR_CVAD_N_FRAMES_CHECK_END_LONG; + + cvad_state->start_sum_threshold = DETECTOR_CVAD_COUNT_SUM_START_SENSITIVE*sensitivity_frac; + cvad_state->start_sum_threshold += DETECTOR_CVAD_COUNT_SUM_START*(1-sensitivity_frac); + + cvad_state->end_sum_short_coeff = DETECTOR_CVAD_COUNT_END_SHORT_FACTOR_SENSITIVE*sensitivity_frac; + cvad_state->end_sum_short_coeff += DETECTOR_CVAD_COUNT_END_SHORT_FACTOR*(1-sensitivity_frac); + + cvad_state->end_sum_long_coeff = DETECTOR_CVAD_COUNT_END_LONG_FACTOR_SENSITIVE*sensitivity_frac; + cvad_state->end_sum_long_coeff += DETECTOR_CVAD_COUNT_END_LONG_FACTOR*(1-sensitivity_frac); +} + +void wv_detector_cvad_update_ref_levels(s_wv_detector_cvad_state *cvad_state, + double *band_energy, + double dfc, + double sfm) +{ + int b=0; + if (!cvad_state->thresh_initialized) { + //if still initializing, accumulate values to average + for(b=0; bref_energy[b] += band_energy[b]; + } + + + cvad_state->ref_sfm += sfm; + + cvad_state->dfc_history[cvad_state->frame_number] = dfc > 0 ? log(dfc) : 0; + } + + //record energy history + for(b=0; benergy_history[b][cvad_state->energy_history_index] = band_energy[b]; + } + cvad_state->energy_history_index++; + cvad_state->energy_history_index%=DETECTOR_CVAD_ENERGY_MEMORY; + + if (cvad_state->frame_number >= DETECTOR_CVAD_FRAMES_INIT) { + if(!cvad_state->thresh_initialized) { + //if done initializing, divide by number of samples to get an average + cvad_state->thresh_initialized = 1; + for(b=0; bref_energy[b] /= cvad_state->frame_number; + } + + cvad_state->ref_sfm /= cvad_state->frame_number; + + double sum = 0; + double sq_sum = 0; + for(b=0; bref_dfc+=cvad_state->dfc_history[b]; + sum += cvad_state->dfc_history[b]; + sq_sum += pow(cvad_state->dfc_history[b],2); + } + cvad_state->ref_dfc /= cvad_state->frame_number; + cvad_state->ref_dfc_var = (sq_sum-sum*sum/cvad_state->frame_number)/(cvad_state->frame_number -1); + + } else if (cvad_state->talking == 0) { + //otherwise update thresholds based on adaptive rules if there's no speech + wv_detector_cvad_modify_update_coeffs(cvad_state); + for(b=0; bref_energy[b] *= (1-cvad_state->energy_update_coeff[b]); + cvad_state->ref_energy[b] += cvad_state->energy_update_coeff[b]*band_energy[b]; + } + + } + } + +} + +void vw_detector_cvad_set_threshold(s_wv_detector_cvad_state *cvad_state) +{ + //update thresholds to be a multiple of the reference level + int b; + cvad_state->th_energy[0] = cvad_state->ref_energy[0]*cvad_state->energy_thresh_coeff_lower; + for(b=1; bth_energy[b] = cvad_state->ref_energy[b]*cvad_state->energy_thresh_coeff_upper; + } + cvad_state->th_dfc = cvad_state->ref_dfc+cvad_state->dfc_thresh; + cvad_state->th_sfm = cvad_state->ref_sfm+cvad_state->sfm_thresh; +} + +void wv_detector_cvad_modify_update_coeffs(s_wv_detector_cvad_state *cvad_state){ + int b; + for(b=0; benergy_history[b][h]; + sq_sum+=pow(cvad_state->energy_history[b][h],2); + } + double variance = (sq_sum-sum*sum/DETECTOR_CVAD_ENERGY_MEMORY)/(DETECTOR_CVAD_ENERGY_MEMORY-1); + double ratio = variance/cvad_state->energy_prev_variance[b]; + if(ratio > 1.25){ + cvad_state->energy_update_coeff[b] = 0.25; + } else if(ratio > 1.10){ + cvad_state->energy_update_coeff[b] = 0.20; + } else if(ratio > 1.00){ + cvad_state->energy_update_coeff[b] = 0.15; + } else if(ratio > 0.00){ + cvad_state->energy_update_coeff[b] = 0.10; + } else { + //negative value indicates that this is the first pass of variance. Just set the coeff to 0.2 + cvad_state->energy_update_coeff[b] = 0.20; + } + cvad_state->energy_prev_variance[b] = variance; + } +} + +short int vw_detector_cvad_check_frame(s_wv_detector_cvad_state *cvad_state, double *band_energy, double dfc, double sfm, int zero_crossings) +{ + short int counter; + + counter = 0; + + int band_counter = 0; + if (band_energy[0] > cvad_state->th_energy[0]) { + counter += 2; + } + + int b; + for(b=1; b cvad_state->th_energy[b]){ + band_counter++; + } + } + if(band_counter >= 2){ + counter+=2; + } + + if (fabs((dfc > 0 ? log(dfc): 0) - cvad_state->ref_dfc) > cvad_state->ref_dfc_var) { + counter++; + } + if (sfm > cvad_state->th_sfm) { + counter++; + } + if(zero_crossings >= cvad_state->min_zero_crossings && zero_crossings <= cvad_state->max_zero_crossings){ + counter++; + } + + return counter; +} + + +double frames_detector_cvad_most_dominant_freq(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double nb_samples) +{ + double k = 0.0f; + double max = 0.0f; + double amplitude_minimum = 1.0f; + int i; + + for (i = 0; i < nb_modules; i++) { + if (fft_mags[i] > max && fft_mags[i] > amplitude_minimum) { + max = fft_mags[i]; + k = i; + } + } + + return k * (double)cvad_state->sample_freq / (double)nb_samples; +} + +void frames_detector_cvad_multiband_energy(s_wv_detector_cvad_state *cvad_state, float *fft_mags, int nb_modules, double *band_energy, int nb_samples){ + + int b = 0; + int k = 0; + + for(b = 0; bsample_freq/nb_samples < 1000*(b+1)){ + band_energy[b]+=fft_mags[k]; + k++; + } + } + +} + +double frames_detector_cvad_spectral_flatness(float *fft_mags, int nb) +{ + double geo_mean = 0.0f; + double arithm_mean = 0.0f; + double sfm = 0.0f; + int i; + + for (i = 0; i < nb; i++) { + if (fft_mags[i] != 0.0f) { + geo_mean += log(fft_mags[i]); + arithm_mean += fft_mags[i]; + } + } + geo_mean = exp(geo_mean / (double) nb); + arithm_mean = arithm_mean / (double) nb; + sfm = 10 * log10(geo_mean / arithm_mean); + sfm = fabs(sfm); + + return sfm; +} + +int frames_detector_cvad_zero_crossings(short int *samples, int nb){ + int num_zero_crossings = 0; + int i; + + for(i=1; iprevious_state[cvad_state->previous_state_index] = value; + cvad_state->previous_state_index++; + cvad_state->previous_state_index%=DETECTOR_CVAD_RESULT_MEMORY; +} + +static int frame_memory_sum_last_n(s_wv_detector_cvad_state *cvad_state, int nb) +{ + int i = 0; + int sum = 0; + + for (i = 0; i < nb; i++) { + int indx = (cvad_state->previous_state_index - (i+1) + DETECTOR_CVAD_RESULT_MEMORY) % DETECTOR_CVAD_RESULT_MEMORY; + sum += cvad_state->previous_state[indx]; + } + + return sum; +} + diff --git a/ios/WITVad.h b/ios/WITVad.h new file mode 100644 index 00000000..63c85dbe --- /dev/null +++ b/ios/WITVad.h @@ -0,0 +1,32 @@ +// +// WITVad.h +// Wit +// +// Created by Aric Lasry on 8/6/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#import +#import +#import "WITCvad.h" + +@protocol WITVadDelegate; + +@interface WITVad : NSObject + +@property (nonatomic, weak) id delegate; + +@property (nonatomic, assign) BOOL stoppedUsingVad; + +- (instancetype)initWithAudioSampleRate:(double)audioSampleRate vadSensitivity:(int)_vadSensitivity vadTimeout:(int)_vadTimeout; +- (void)gotAudioSamples:(NSData *)samples; + +@end + + +@protocol WITVadDelegate + +-(void) vadStartedTalking; +-(void) vadStoppedTalking; + +@end diff --git a/ios/WITVad.m b/ios/WITVad.m new file mode 100644 index 00000000..d5e1ff15 --- /dev/null +++ b/ios/WITVad.m @@ -0,0 +1,123 @@ +// +// WITVad.m +// Wit +// +// Created by Aric Lasry on 8/6/14. +// Copyright (c) 2014 Willy Blandin. All rights reserved. +// + +#import "WITVad.h" + +@implementation WITVad { + s_wv_detector_cvad_state *vad_state; + FFTSetup fft_setup; +} + +- (void)gotAudioSamples:(NSData *)samples { + UInt32 size = (UInt32)[samples length]; + short *bytes = (short*)[samples bytes]; + + for(int sample_offset=0; sample_offset+self->vad_state->samples_per_frame < size/2; sample_offset+=self->vad_state->samples_per_frame){ + + int nonZero=0; + + //check to make sure buffer actually has audio data + for(int i=0; ivad_state->samples_per_frame; i++){ + if(bytes[sample_offset+i] != 0){ + nonZero=1; + break; + } + } + + //skip frame if it has nothing + if(!nonZero) continue; + + float *fft_mags = [self get_fft:(bytes+sample_offset)]; + + int detected_speech = wvs_cvad_detect_talking(self->vad_state, bytes+sample_offset, fft_mags); + + free(fft_mags); + + if ( detected_speech == 1){ + //someone just started talking + NSLog(@"start talking..."); + // debug(@"Starting......................") + dispatch_async(dispatch_get_main_queue(), ^{ + [self.delegate vadStartedTalking]; + }); + } else if ( detected_speech == 0) { + //someone just stopped talking + NSLog(@"stop talking..."); + // debug(@"Stopping......................"); + self.stoppedUsingVad = YES; + dispatch_async(dispatch_get_main_queue(), ^{ + [self.delegate vadStoppedTalking]; + }); + break; + } + } + +} + +- (instancetype)initWithAudioSampleRate:(double)audioSampleRate vadSensitivity:(int)_vadSensitivity vadTimeout:(int)_vadTimeout { + // debug(@"WITVad init"); + self = [super init]; + if (!self) { + return nil; + } + int vadSensitivity = (int)fmin(100,fmax(0,_vadSensitivity)); //must be between 0 and 100 + int vadTimeout = (int)_vadTimeout; + + self->vad_state = wv_detector_cvad_init(audioSampleRate,vadSensitivity,vadTimeout); + self.stoppedUsingVad = NO; + + //get the next power of 2 that'll fit our data + int logN = log2(self->vad_state->samples_per_frame); //samples_per_frame will be a power of 2 + //store the FFT setup for many later uses + self->fft_setup = vDSP_create_fftsetup(logN, kFFTRadix2); + + return self; +} + +- (void)dealloc { + // debug(@"Clean WITVad"); + wv_detector_cvad_clean(self->vad_state); +} + +- (float*)get_fft:(short *)samples { + int N = self->vad_state->samples_per_frame; //guarenteed to be a power of 2 + + //dynamically allocate an array for our results since we don't want to mutate the input samples + float *fft_mags = malloc(N/2 * sizeof(float)); + float *fsamples = malloc(N * sizeof(float)); + + for(int i=0; ivad_state->samples_per_frame){ + fsamples[i] = samples[i]; + } else { + fsamples[i] = 0; + } + } + + DSPSplitComplex tempSplitComplex; + tempSplitComplex.realp = malloc(N/2 * sizeof(float)); + tempSplitComplex.imagp = malloc(N/2 * sizeof(float)); + + //pack the real data into a split form for accelerate + vDSP_ctoz((DSPComplex*)fsamples, 2, &tempSplitComplex, 1, N/2); + + //do the FFT + vDSP_fft_zrip(self->fft_setup, &tempSplitComplex, 1, (int)log2(N), kFFTDirection_Forward); + + //get the magnitudes + vDSP_zvabs(&tempSplitComplex, 1, fft_mags, 1, N/2); + + //clear up memory + free(fsamples); + free(tempSplitComplex.realp); + free(tempSplitComplex.imagp); + + return fft_mags; +} + +@end From e2a27527250446cd192c28c8428380d37ff1844e Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 8 Aug 2017 15:02:23 +0900 Subject: [PATCH 23/26] Support sample rate as initializing VAD in android --- android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java | 4 ++-- android/src/main/jni/WITVadWrapper.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index 3f5bca81..f781bc4b 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -40,7 +40,7 @@ public class RecordWaveTask extends AsyncTask { System.loadLibrary("witvad"); } - public native int VadInit(int vadSensitivity, int vadTimeout); + public native int VadInit(int sampleRate, int vadSensitivity, int vadTimeout); public native int VadStillTalking(short[] samples, float[] fft_mags); public native int GetVadSamplesPerFrame(); public native void VadClean(); @@ -121,7 +121,7 @@ protected Object[] doInBackground(File... unused) { long total = 0; int vadResult; - VadInit(vadSensitivity, vadTimeout); + VadInit(SAMPLE_RATE, vadSensitivity, vadTimeout); FloatFFT_1D fft = new FloatFFT_1D(GetVadSamplesPerFrame()); float[] fft_mags = new float[GetVadSamplesPerFrame()/2]; diff --git a/android/src/main/jni/WITVadWrapper.c b/android/src/main/jni/WITVadWrapper.c index 717de8a4..197b1bb2 100644 --- a/android/src/main/jni/WITVadWrapper.c +++ b/android/src/main/jni/WITVadWrapper.c @@ -4,9 +4,8 @@ static s_wv_detector_cvad_state* wit_vad_g_struct = 0; -int Java_com_rnim_rn_audio_RecordWaveTask_VadInit(JNIEnv *env, jobject obj, jint vadSensitivity, jint vadTimeout) +int Java_com_rnim_rn_audio_RecordWaveTask_VadInit(JNIEnv *env, jobject obj, jint sample_rate, jint vadSensitivity, jint vadTimeout) { - int sample_rate = 16000; vadSensitivity = (int)fmax(0,fmin(100,vadSensitivity)); //bounds-checking wit_vad_g_struct = wv_detector_cvad_init(sample_rate, (int)vadSensitivity, (int)vadTimeout); From b3aace933d9b49c78a27663e32fad415c1c32782 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Tue, 8 Aug 2017 15:36:44 +0900 Subject: [PATCH 24/26] Support sensitivity and timeout parameter for react-native module --- AudioExample/AudioExample.js | 3 +++ .../com/rnim/rn/audio/AudioRecorderManager.java | 12 +++++++++++- index.js | 13 +++++++++++-- ios/AudioRecorderManager.m | 15 +++++++++++---- ios/WITVad.h | 2 +- ios/WITVad.m | 2 +- 6 files changed, 38 insertions(+), 9 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 62f088c4..6791709d 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -32,6 +32,9 @@ class AudioExample extends Component { // AudioQuality: "Low", // AudioEncoding: "aac", // AudioEncodingBitRate: 32000, + }, { + Sensitivity: 0, + Timeout: 7000, }); } diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 1d4285c0..0e0113ba 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -123,7 +123,7 @@ private int getOutputFormatFromString(String outputFormat) { } @ReactMethod - public void prepareStreamingAtPath(String recordingPath, int bufferSize, ReadableMap recordingSettings, Promise promise) { + public void prepareStreamingAtPath(String recordingPath, int bufferSize, ReadableMap recordingSettings, ReadableMap vadSettings, Promise promise) { try { File wavFile = new File(recordingPath); @@ -144,6 +144,16 @@ public void prepareStreamingAtPath(String recordingPath, int bufferSize, Readabl recordTask.setChannelMask(channelMask); } + if (vadSettings.hasKey("Sensitivity")) { + int vadSensitivity = vadSettings.getInt("Sensitivity"); + recordTask.setVadSensitivity(vadSensitivity); + } + + if (vadSettings.hasKey("Timeout")) { + int vadTimeout = vadSettings.getInt("Timeout"); + recordTask.setVadTimeout(vadTimeout); + } + recordTask.setBufferSize(bufferSize); recordTask.setOutputFile(wavFile); diff --git a/index.js b/index.js index 14b07c4c..42bb0260 100644 --- a/index.js +++ b/index.js @@ -12,7 +12,7 @@ import ReactNative, { var AudioRecorderManager = NativeModules.AudioRecorderManager; var AudioRecorder = { - prepareStreamingAtPath: function(path, bufferSize=8192, options) { + prepareStreamingAtPath: function(path, bufferSize=8192, options, vadOptions) { if (this.progressSubscription) this.progressSubscription.remove(); this.progressSubscription = NativeAppEventEmitter.addListener('recordingProgress', (data) => { @@ -61,6 +61,13 @@ var AudioRecorder = { var recordingOptions = {...defaultOptions, ...options}; + var defaultVadOptions = { + Sensitivity: 0, + Timeout: 7000, + } + + var vadOptions = {...defaultVadOptions, ...vadOptions}; + if (Platform.OS === 'ios') { AudioRecorderManager.prepareStreamingAtPath( path, @@ -70,9 +77,11 @@ var AudioRecorder = { recordingOptions.AudioQuality, recordingOptions.AudioEncoding, recordingOptions.MeteringEnabled, + vadOptions.Sensitivity, + vadOptions.Timeout, ); } else { - return AudioRecorderManager.prepareStreamingAtPath(path, bufferSize, recordingOptions); + return AudioRecorderManager.prepareStreamingAtPath(path, bufferSize, recordingOptions, vadOptions); } }, startStreaming: function() { diff --git a/ios/AudioRecorderManager.m b/ios/AudioRecorderManager.m index 0ac10f57..355cb192 100644 --- a/ios/AudioRecorderManager.m +++ b/ios/AudioRecorderManager.m @@ -35,6 +35,8 @@ @implementation AudioRecorderManager { NSNumber *_audioSampleRate; BOOL _meteringEnabled; int _bufferSize; + int _vadSensitivity; + int _vadTimeout; } StreamingModule* streamingModule; @@ -128,7 +130,7 @@ - (NSString *) applicationDocumentsDirectory }]; } -RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path bufferSize:(int)bufferSize sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled) +RCT_EXPORT_METHOD(prepareStreamingAtPath:(NSString *)path bufferSize:(int)bufferSize sampleRate:(float)sampleRate channels:(nonnull NSNumber *)channels quality:(NSString *)quality encoding:(NSString *)encoding meteringEnabled:(BOOL)meteringEnabled vadSensitivity:(int)vadSensitivity vadTimeout:(int)vadTimeout) { NSLog(@"prepareStreaming"); _audioFileURL = [NSURL fileURLWithPath:path]; @@ -140,6 +142,8 @@ - (NSString *) applicationDocumentsDirectory _audioSampleRate = [NSNumber numberWithFloat:44100.0]; _meteringEnabled = NO; _bufferSize = 8192; + _vadSensitivity = 0; + _vadTimeout = 7000; // Set audio quality from options if (quality != nil) { @@ -202,10 +206,13 @@ - (NSString *) applicationDocumentsDirectory _meteringEnabled = meteringEnabled; } + _vadSensitivity = vadSensitivity; + _vadTimeout = vadTimeout; + if (vad == nil) { - vad = [[WITVad alloc] initWithAudioSampleRate:[_audioSampleRate doubleValue] - vadSensitivity:0 - vadTimeout:7000]; + vad = [[WITVad alloc] initWithAudioSampleRate:[_audioSampleRate intValue] + vadSensitivity:_vadSensitivity + vadTimeout:_vadTimeout]; vad.delegate = self; } diff --git a/ios/WITVad.h b/ios/WITVad.h index 63c85dbe..c0c90610 100644 --- a/ios/WITVad.h +++ b/ios/WITVad.h @@ -18,7 +18,7 @@ @property (nonatomic, assign) BOOL stoppedUsingVad; -- (instancetype)initWithAudioSampleRate:(double)audioSampleRate vadSensitivity:(int)_vadSensitivity vadTimeout:(int)_vadTimeout; +- (instancetype)initWithAudioSampleRate:(int)audioSampleRate vadSensitivity:(int)_vadSensitivity vadTimeout:(int)_vadTimeout; - (void)gotAudioSamples:(NSData *)samples; @end diff --git a/ios/WITVad.m b/ios/WITVad.m index d5e1ff15..d587bea2 100644 --- a/ios/WITVad.m +++ b/ios/WITVad.m @@ -59,7 +59,7 @@ - (void)gotAudioSamples:(NSData *)samples { } -- (instancetype)initWithAudioSampleRate:(double)audioSampleRate vadSensitivity:(int)_vadSensitivity vadTimeout:(int)_vadTimeout { +- (instancetype)initWithAudioSampleRate:(int)audioSampleRate vadSensitivity:(int)_vadSensitivity vadTimeout:(int)_vadTimeout { // debug(@"WITVad init"); self = [super init]; if (!self) { From 8d2e6773ff52fdf626162e47e8f44cfd55cfde00 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Thu, 10 Aug 2017 17:12:17 +0900 Subject: [PATCH 25/26] Support to set audioSource in Android --- AudioExample/AudioExample.js | 1 + .../com/rnim/rn/audio/AudioRecorderManager.java | 17 ++++++++++++++++- .../java/com/rnim/rn/audio/RecordWaveTask.java | 2 +- index.js | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/AudioExample/AudioExample.js b/AudioExample/AudioExample.js index 6791709d..a5419e87 100644 --- a/AudioExample/AudioExample.js +++ b/AudioExample/AudioExample.js @@ -28,6 +28,7 @@ class AudioExample extends Component { AudioRecorder.prepareStreamingAtPath(this.state.audioPath, 1600, { SampleRate: 22050, Channels: 1, + AudioSource: 'MIC', // Following is not supported // AudioQuality: "Low", // AudioEncoding: "aac", diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 0e0113ba..785aa50c 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -129,7 +129,22 @@ public void prepareStreamingAtPath(String recordingPath, int bufferSize, Readabl File wavFile = new File(recordingPath); recordTask = new RecordWaveTask(); - recordTask.setAudioSource(MediaRecorder.AudioSource.MIC); + if (recordingSettings.hasKey("AudioSource")) { + switch(recordingSettings.getString("AudioSource")) { + case "DEFAULT": + recordTask.setAudioSource(MediaRecorder.AudioSource.DEFAULT); + break; + case "MIC": + recordTask.setAudioSource(MediaRecorder.AudioSource.MIC); + break; + case "VOICE_RECOGNITION": + recordTask.setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION); + break; + default: + recordTask.setAudioSource(MediaRecorder.AudioSource.DEFAULT); + break; + } + } if (recordingSettings.hasKey("SampleRate")) { recordTask.setSampleRate(recordingSettings.getInt("SampleRate")); diff --git a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java index f781bc4b..c961481e 100644 --- a/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java +++ b/android/src/main/java/com/rnim/rn/audio/RecordWaveTask.java @@ -25,7 +25,7 @@ public class RecordWaveTask extends AsyncTask { // Default value - private int AUDIO_SOURCE = MediaRecorder.AudioSource.MIC; + private int AUDIO_SOURCE = MediaRecorder.AudioSource.DEFAULT; private int SAMPLE_RATE = 44100; // Hz private int ENCODING = AudioFormat.ENCODING_PCM_16BIT; private int CHANNEL_MASK = AudioFormat.CHANNEL_IN_MONO; diff --git a/index.js b/index.js index 42bb0260..b3cfe295 100644 --- a/index.js +++ b/index.js @@ -55,6 +55,7 @@ var AudioRecorder = { AudioQuality: 'High', AudioEncoding: 'ima4', MeteringEnabled: false, + AudioSource: 'DEFAULT', // OutputFormat: 'mpeg_4', // AudioEncodingBitRate: 32000 }; From a39ab5406437a604e871709c8704c229147528c3 Mon Sep 17 00:00:00 2001 From: Daehoon Kim Date: Wed, 13 Sep 2017 18:15:36 +0900 Subject: [PATCH 26/26] Check null of recordTask --- .../src/main/java/com/rnim/rn/audio/AudioRecorderManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java index 785aa50c..5b266a75 100644 --- a/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java +++ b/android/src/main/java/com/rnim/rn/audio/AudioRecorderManager.java @@ -237,7 +237,7 @@ public void startStreaming(Promise promise){ @ReactMethod public void stopStreaming(final Promise promise){ Log.d("RecordWaveTask", "stopStreaming"); - if (!recordTask.isCancelled() && recordTask.getStatus() == AsyncTask.Status.RUNNING) { + if (recordTask != null && !recordTask.isCancelled() && recordTask.getStatus() == AsyncTask.Status.RUNNING) { Log.d("RecordWaveTask", "stopStreaming2"); isRecording = false; recordTask.setCancelCompleteListener(new RecordWaveTask.OnCancelCompleteListener() {