fluendo · rgonzalezfluendo · Oct 24, 2024
diff --git a/README.md b/README.md
@@ -5,4 +5,5 @@ Open-source GStreamer plugins provided by Fluendo
 ## Plugins
 
  - [hype](hype/README.md): HYbrid Parallel Encoder.
- - [fluttml](plugins/ttml/README.md): Parses and renders TTML files.
+ - [fluttml](plugins/ttml/README.md): Parses and renders TTML files.
+ - [whisper](plugins/whisper/README.md): Transcribes audio based on whispercpp library.
diff --git a/meson_options.txt b/meson_options.txt
@@ -5,5 +5,7 @@ option('injectbin', type : 'feature', value : 'auto', description : 'Build GStre
 option('ttml_build_ttmlparse', type : 'feature', value : 'enabled', description : 'gst-fluendo-ttml: build the ttmlparse element')
 option('ttml_build_ttmlrender', type : 'feature', value : 'enabled', description : 'gst-fluendo-ttml: build the ttmlrender element')
 
+option('whisper', type : 'feature', value : 'auto')
+
 option('examples', type : 'feature', value : 'auto', description : 'Build examples')
 option('tests', type : 'feature', value : 'auto', description : 'Build tests')
diff --git a/plugins/meson.build b/plugins/meson.build
@@ -8,6 +8,7 @@ plugins = {
 # Plugin Name             Supported OS                            Description
   'ttml':                 { 'os': ['linux', 'windows', 'darwin'],    'desc': 'GStreamer Fluendo TTML Element' },
   'injectbin':            { 'os': ['linux', 'windows', 'darwin'],    'desc': 'GStreamer Fluendo dynamic pipeline rebuild element' },
+  'whisper':              { 'os': ['linux',                    ],    'desc': 'GStreamer Fluendo whisper transcriber' },
 }
 
 # Meson builds OSX libraries with '.dylib' extension. However, the name_suffix

diff --git a/plugins/whisper/README.md b/plugins/whisper/README.md
@@ -0,0 +1,38 @@
+# Whisper
+
+Plugin to transcribe audio to text based on WhisperC++ from OpenAI:
+
+# Build
+```
+meson setup builddir -Dwhisper=enabled --auto-features=disabled
+ninja -C builddir
+```
+
+# Download model
+```
+./subprojects/whispercpp/models/download-ggml-model.sh base
+```
+
+# Usage
+Showing the dump transcribed data
+```
+gst-launch-1.0 --gst-plugin-path builddir/plugins/whisper/ \
+    filesrc location=subprojects/whispercpp/samples/jfk.wav ! \
+    decodebin ! \
+    audioconvert ! \
+    audio/x-raw,format=F32LE ! \
+    whisper silent=FALSE model-path=./subprojects/whispercpp/models/ggml-base.bin
+```
+
+Transcribe as captions
+```
+gst-launch-1.0 --gst-plugin-path builddir/plugins/whisper/ \
+    filesrc location=subprojects/whispercpp/samples/jfk.wav ! \
+    decodebin ! \
+    audioconvert ! \
+    audio/x-raw,format=F32LE ! \
+    whisper model-path=./subprojects/whispercpp/models/ggml-base.bin ! \
+    textrender ! \
+    videoconvert ! \
+    autovideosink
+```