diff --git a/.gitignore b/.gitignore index f3e690a..f831b4c 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,5 @@ wheelbuild/ wheelhouse/ __pycache__ *.whl + +*.h264 diff --git a/CMakeLists.txt b/CMakeLists.txt index 1bbb19f..b2b1169 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,9 +7,11 @@ set(VTKSTREAMING_USE_LIBVPX ON) set(modules VTKStreaming::Core VTKStreaming::Encode + VTKStreaming::Decode VTKStreaming::NvEncode VTKStreaming::libvpx VTKStreaming::VpxEncode + VTKStreaming::VpxDecode VTKStreaming::OpenGL2 VTKStreaming::WEBM ) diff --git a/README.md b/README.md index b81d1ee..6e1d804 100644 --- a/README.md +++ b/README.md @@ -89,78 +89,11 @@ written to `wheelhouse/` and can be installed directly: pip install wheelhouse/vtk_streaming-*.whl ``` -## Example - -```py -from vtkmodules.vtkCommonCore import vtkUnsignedCharArray -from vtkmodules.vtkRenderingCore import vtkRenderer, vtkRenderWindow, vtkRenderWindowInteractor -from vtkmodules.util.numpy_support import vtk_to_numpy -from vtkmodules.util.misc import calldata_type -from vtkmodules.util.vtkConstants import VTK_OBJECT - -from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder -from vtk_streaming.vtkStreamingNvEncode import vtkNvEncoderGL -from vtk_streaming.vtkStreamingVpxEncode import vtkVpxEncoder -from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame -from vtk_streaming.vtkStreamingCore import VTKVC_H264, VTKVC_H265, VTKVC_VP9, VTKPF_IYUV, vtkCompressedVideoPacket - -ren = vtkRenderer() -ren.SetBackground(0.1, 0.2, 0.4) -win = vtkRenderWindow() -win.AddRenderer(ren) -width, height = 641, 953 # size of video frames will likely be aligned to some value such as %4 or %8 -win.SetSize(width, height) -iren = vtkRenderWindowInteractor() -iren.SetRenderWindow(win) -iren.Initialize() -iren.Render() - -# Encoder takes the window to get the OpenGL context from it -encoder: vtkVideoEncoder = None -if vtkNvEncoderGL.CheckAvailability(): - encoder = vtkNvEncoderGL() - encoder.SetCodec(VTKVC_H264) - print("Using H264 through NVENC") -else: - encoder = vtkVpxEncoder() - encoder.SetCodec(VTKVC_VP9) - print("Using VP9 through libvpx") -encoder.SetGraphicsContext(win) -encoder.SetWidth(width) # to handle in resize event! -encoder.SetHeight(height) -encoder.SetInputPixelFormat(VTKPF_IYUV) - -# This is used to copy the framebuffer of the window to a NVENC shared-texture -picture = vtkOpenGLVideoFrame() -picture.SetContext(win) -picture.SetWidth(width) # to handle in resize event! -picture.SetHeight(height) -picture.SetPixelFormat(VTKPF_IYUV) -picture.AllocateDataStore() - -# You will receive video packets through this callback -@calldata_type(VTK_OBJECT) -def receive_data(_obj: vtkVideoEncoder, _ev: int, data: vtkCompressedVideoPacket): - frame_data: vtkUnsignedCharArray = data.GetData() - raw_bytes = vtk_to_numpy(frame_data).tobytes() # do something with it -encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_data) - -# We have logic to insert in rendering loop, -# Depending on the application and used GUI etc, this might differ. -while True: - # Render current frame - iren.ProcessEvents() - iren.Render() - # Capture last framebuffer - picture.Capture(win) - # Encode using choosen backend, this may invoke EncodedVideoChunkEvent any number of times - encoder.Encode(picture) - -# In a real application this should be called on exit -# this example is simply killed by user with ctrl+c -enc.Drain() -enc.Shutdown() -``` +## Examples + +1. [examples/simple_encoder_decoder.py](./examples/simple_encoder_decoder.py) - Live VP9 encode/decode round-trip with two render windows side by side. +2. [examples/resize_encoder_decoder.py](./examples/resize_encoder_decoder.py) - VP9 encode/decode round-trip that survives window resizes. +3. [examples/simple_nvenc_record.py](./examples/simple_nvenc_record.py) - Record a render window for later playback using NVENC. This needs `ffplay` to playback the .h264 file. ## Getting help diff --git a/Streaming/Encode/vtkVideoEncoder.cxx b/Streaming/Encode/vtkVideoEncoder.cxx index 0e60f38..d3aa70b 100644 --- a/Streaming/Encode/vtkVideoEncoder.cxx +++ b/Streaming/Encode/vtkVideoEncoder.cxx @@ -92,20 +92,6 @@ vtkRenderWindow* vtkVideoEncoder::GetGraphicsContext() const return this->GraphicsContext; } -//------------------------------------------------------------------------------ -void vtkVideoEncoder::SetWidth(int width) -{ - vtkLogScopeF(TRACE, "%s, w=%d", __func__, width); - this->Width = width; -} - -//------------------------------------------------------------------------------ -void vtkVideoEncoder::SetHeight(int height) -{ - vtkLogScopeF(TRACE, "%s, h=%d", __func__, height); - this->Height = height; -} - //------------------------------------------------------------------------------ void vtkVideoEncoder::SetBitRateControlMode(int mode) { diff --git a/Streaming/Encode/vtkVideoEncoder.h b/Streaming/Encode/vtkVideoEncoder.h index 591d76c..c6bf10e 100644 --- a/Streaming/Encode/vtkVideoEncoder.h +++ b/Streaming/Encode/vtkVideoEncoder.h @@ -161,9 +161,9 @@ class VTKSTREAMINGENCODE_EXPORT vtkVideoEncoder : public vtkObject /** * Set/Get width and height of encoding context. */ - void SetWidth(int width); + vtkSetMacro(Width, int); vtkGetMacro(Width, int); - void SetHeight(int height); + vtkSetMacro(Height, int); vtkGetMacro(Height, int); ///@} diff --git a/Streaming/NvEncode/vtkNvEncoderGL.cxx b/Streaming/NvEncode/vtkNvEncoderGL.cxx index a552991..d248f69 100644 --- a/Streaming/NvEncode/vtkNvEncoderGL.cxx +++ b/Streaming/NvEncode/vtkNvEncoderGL.cxx @@ -160,6 +160,7 @@ bool vtkNvEncoderGL::InitializeInternal() unsigned int cudaDeviceCount = 0; VTK_NV_CUDA_DRIVER_API_CHECKED_INVOKE( cuGLGetDevices_v2(&cudaDeviceCount, devices, 4, CU_GL_DEVICE_LIST_ALL)); + char devName[100]; if (!cudaDeviceCount) { vtkLogF(ERROR, "OpenGL rendering is not on a CUDA device."); @@ -167,21 +168,18 @@ bool vtkNvEncoderGL::InitializeInternal() } else { - vtkLogF(INFO, "Found %u devices capable of CUDA-OpenGL interop.", cudaDeviceCount); + status = cufns->cuDeviceGetName(devName, sizeof(devName), devices[0]); + auto& ctx = this->CUDAInstance->Context; + ctx = nullptr; + status = cufns->cuCtxCreate_v2(&ctx, 0, devices[0]); + unsigned int version = 0; + cufns->cuCtxGetApiVersion(ctx, &version); + unsigned int major = version / 1000; + unsigned int minor = version - major * 1000; + vtkLogF(TRACE, + "NVENC GPU #%d ('%s') CUDA ctx %d.%d in use. %d gpu (s) capable of cuda-gl interop.", 0, + devName, major, minor, cudaDeviceCount); } - char devName[100]; - status = cufns->cuDeviceGetName(devName, sizeof(devName), devices[0]); - vtkLogF(INFO, "NvEncode: GPU %d in use - %s", 0, devName); - - auto& ctx = this->CUDAInstance->Context; - ctx = nullptr; - status = cufns->cuCtxCreate_v2(&ctx, 0, devices[0]); - - unsigned int version = 0; - cufns->cuCtxGetApiVersion(ctx, &version); - unsigned int major = version / 1000; - unsigned int minor = version - major * 1000; - vtkLogF(INFO, "CUDA context in use - %d.%d", major, minor); } // 2. Initializes NVENC with CUDA device. diff --git a/Streaming/NvEncode/vtkNvEncoderInternals.cxx b/Streaming/NvEncode/vtkNvEncoderInternals.cxx index f11b20d..cad785d 100644 --- a/Streaming/NvEncode/vtkNvEncoderInternals.cxx +++ b/Streaming/NvEncode/vtkNvEncoderInternals.cxx @@ -248,7 +248,6 @@ bool vtkNvEncoderInternals::LoadNvEncodeAPI() } else { - vtkLog(INFO, << "loaded NvEncodeAPI."); return true; } } diff --git a/examples/resize_encoder_decoder.py b/examples/resize_encoder_decoder.py new file mode 100644 index 0000000..b743165 --- /dev/null +++ b/examples/resize_encoder_decoder.py @@ -0,0 +1,196 @@ +"""VP9 encode/decode round-trip that survives window resizes. + +Same side-by-side layout as examples/simple_encoder_decoder.py: the left +window renders a spinning cylinder, every finished render (vtkCommand:: +EndEvent) is captured and VP9-encoded, and each packet is decoded and +displayed in the right window. + +The addition is resize handling, and it is almost free: whenever the scene +window's size changes, the capture frame is rebuilt at the new size — the +next Encode then makes vtkVideoEncoder tear down and rebuild its context +from the new frame dimensions, and the decoder follows the packet dimensions +on its own. Only the output window needs an explicit SetSize to match. + +Drag-resize the left window, or just wait: a timer cycles it through a few +sizes automatically. +""" + +from datetime import datetime + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +import vtkmodules.vtkInteractionStyle # noqa: F401 (register interactor styles) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkCommonCore import vtkCommand +from vtkmodules.vtkFiltersSources import vtkCylinderSource +from vtkmodules.vtkRenderingCore import ( + vtkActor, + vtkPolyDataMapper, + vtkRenderer, + vtkRenderWindow, + vtkRenderWindowInteractor, + vtkTextActor, +) + +from vtk_streaming.vtkStreamingCore import ( + VTKPF_IYUV, + VTKVC_VP9, + vtkCompressedVideoPacket, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingDecode import vtkVideoDecoder +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame +from vtk_streaming.vtkStreamingVpxDecode import vtkVpxDecoder +from vtk_streaming.vtkStreamingVpxEncode import vtkVpxEncoder + +# The automatic size cycle; codecs prefer sizes aligned to %4 or %8. +SIZES = [(640, 480), (800, 600), (480, 360)] +TICKS_PER_SIZE = 120 # timer ticks between automatic resizes (~4 s at 33 ms) + +width, height = SIZES[0] + +# Left window: the scene that gets encoded. +cylinder = vtkCylinderSource() +mapper = vtkPolyDataMapper() +mapper.SetInputConnection(cylinder.GetOutputPort()) +actor = vtkActor() +actor.SetMapper(mapper) +actor.GetProperty().SetColor(255 / 255, 99 / 255, 71 / 255) # tomato +actor.RotateX(30.0) +actor.RotateY(-45.0) +renderer = vtkRenderer() +renderer.AddActor(actor) +renderer.SetBackground(0.1, 0.2, 0.4) + +def current_time_text() -> str: + now = datetime.now() + return f"{now:%H:%M:%S}.{now.microsecond // 1000:03d}" + + +# Time readout (HH:MM:SS.ms), anchored to the top right corner of the scene +# window. Normalized viewport coordinates keep it in the corner across resizes. +frame_text = vtkTextActor() +frame_text.SetInput(current_time_text()) +frame_text.GetTextProperty().SetFontSize(18) +frame_text.GetTextProperty().SetJustificationToRight() +frame_text.GetTextProperty().SetVerticalJustificationToTop() +frame_text.GetPositionCoordinate().SetCoordinateSystemToNormalizedViewport() +frame_text.GetPositionCoordinate().SetValue(0.98, 0.98) +renderer.AddViewProp(frame_text) + +scene_window = vtkRenderWindow() +scene_window.SetWindowName("Input scene (VP9 encode)") +scene_window.AddRenderer(renderer) +scene_window.SetSize(width, height) +scene_window.SetPosition(50, 50) + +interactor = vtkRenderWindowInteractor() +interactor.SetRenderWindow(scene_window) +interactor.Initialize() +scene_window.Render() + +# Right window: displays whatever comes out of the decoder. +decoded_window = vtkRenderWindow() +decoded_window.SetWindowName("Decoded output (VP9 decode)") +decoded_window.SetSize(width, height) +decoded_window.SetPosition(50 + max(w for w, _ in SIZES) + 20, 50) +decoded_window.Render() # initialize its OpenGL context before first use + +decoder = vtkVpxDecoder() +decoder.SetGraphicsContext(decoded_window) + + +@calldata_type(VTK_OBJECT) +def display_frame(_decoder: vtkVideoDecoder, _event: int, frame: vtkOpenGLVideoFrame): + # frame.Render presents into the window (it calls window->Frame() itself). + frame.Render(decoded_window) + + +decoder.AddObserver(vtkVideoDecoder.DecodedVideoFrameEvent, display_frame) + +encoder = vtkVpxEncoder() +encoder.SetGraphicsContext(scene_window) +encoder.SetCodec(VTKVC_VP9) +encoder.SetWidth(width) +encoder.SetHeight(height) +encoder.SetInputPixelFormat(VTKPF_IYUV) + + +@calldata_type(VTK_OBJECT) +def receive_packet( + _encoder: vtkVideoEncoder, _event: int, packet: vtkCompressedVideoPacket +): + # In a real application this bitstream would travel over the network; + # here it goes straight to the decoder. Each packet carries its display + # dimensions, which is how the decoder picks up a resize. + decoder.Decode(packet) + + +encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + + +def make_picture(window, picture_width, picture_height): + picture = vtkOpenGLVideoFrame() + picture.SetContext(window) + picture.SetWidth(picture_width) + picture.SetHeight(picture_height) + picture.SetPixelFormat(VTKPF_IYUV) + picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) + picture.AllocateDataStore() + return picture + + +picture = make_picture(scene_window, width, height) + + +def update_time_text(_window: vtkRenderWindow, _event: int): + frame_text.SetInput(current_time_text()) + + +def encode_frame(window: vtkRenderWindow, _event: int): + global picture + size = tuple(window.GetSize()) + if size != (picture.GetWidth(), picture.GetHeight()): + # Rebuild the capture frame; the first Encode at the new dimensions + # makes the encoder rebuild its context, and the decoder follows the + # packet dimensions. Only the output window needs explicit resizing. + print(f"resized to {size[0]}x{size[1]}") + picture = make_picture(window, *size) + decoded_window.SetSize(*size) + picture.Capture(window) + encoder.Encode(picture) # fires EncodedVideoChunkEvent per packet + # Decoding rendered into the other window; hand the context back. + window.MakeCurrent() + + +# StartEvent fires at the start of every vtkRenderWindow::Render, so the +# timestamp is current in the frame about to be drawn and encoded; +# EndEvent fires at the end. +scene_window.AddObserver(vtkCommand.StartEvent, update_time_text) +scene_window.AddObserver(vtkCommand.EndEvent, encode_frame) + +tick = 0 + + +def spin_and_cycle_size(_interactor: vtkRenderWindowInteractor, _event: int): + global tick + tick += 1 + renderer.GetActiveCamera().Azimuth(1.0) + if tick % TICKS_PER_SIZE == 0: + scene_window.SetSize(*SIZES[(tick // TICKS_PER_SIZE) % len(SIZES)]) + scene_window.Render() + + +interactor.AddObserver(vtkCommand.TimerEvent, spin_and_cycle_size) +interactor.CreateRepeatingTimer(33) + +print("Drag-resize the left window, or wait for the automatic size cycle.") +print("Press 'q' or 'e' in the left window to quit.") +interactor.Start() + +encoder.Drain() +encoder.Shutdown() +# Note: do not call decoder.Drain(); vtkVpxDecoder::SendEOS forwards a null +# packet that DecodeInternal dereferences. Shutdown is safe. +decoder.Shutdown() diff --git a/examples/simple_encoder_decoder.py b/examples/simple_encoder_decoder.py new file mode 100644 index 0000000..caf1625 --- /dev/null +++ b/examples/simple_encoder_decoder.py @@ -0,0 +1,172 @@ +"""Live VP9 encode/decode round-trip with two render windows side by side. + +The left window shows an interactive cylinder scene. Every time it finishes a +render (vtkCommand::EndEvent), its framebuffer is captured into a +vtkOpenGLVideoFrame and pushed through vtkVpxEncoder; each encoded packet is +handed straight to a vtkVpxDecoder, whose decoded frames are displayed in the +right window. A repeating timer spins the camera so the stream is alive even +without interaction. + +Window resizing is not handled here, so keep the windows at their initial +size! If you resize the window on the left, you will see a smaller/cropped +image on the right. + +See examples/resize_encoder_decoder.py for a round-trip that survives +resizes. +""" + +from datetime import datetime + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +import vtkmodules.vtkInteractionStyle # noqa: F401 (register interactor styles) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkCommonCore import vtkCommand +from vtkmodules.vtkFiltersSources import vtkCylinderSource +from vtkmodules.vtkRenderingCore import ( + vtkActor, + vtkPolyDataMapper, + vtkRenderer, + vtkRenderWindow, + vtkRenderWindowInteractor, + vtkTextActor, +) + +from vtk_streaming.vtkStreamingCore import ( + VTKPF_IYUV, + VTKVC_VP9, + vtkCompressedVideoPacket, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingDecode import vtkVideoDecoder +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame +from vtk_streaming.vtkStreamingVpxDecode import vtkVpxDecoder +from vtk_streaming.vtkStreamingVpxEncode import vtkVpxEncoder + +width, height = 640, 480 # codecs prefer sizes aligned to %4 or %8 + +# Left window: the scene that gets encoded. +cylinder = vtkCylinderSource() +mapper = vtkPolyDataMapper() +mapper.SetInputConnection(cylinder.GetOutputPort()) +actor = vtkActor() +actor.SetMapper(mapper) +actor.GetProperty().SetColor(255 / 255, 99 / 255, 71 / 255) # tomato +actor.RotateX(30.0) +actor.RotateY(-45.0) +renderer = vtkRenderer() +renderer.AddActor(actor) +renderer.SetBackground(0.1, 0.2, 0.4) + +def current_time_text() -> str: + now = datetime.now() + return f"{now:%H:%M:%S}.{now.microsecond // 1000:03d}" + + +# Time readout (HH:MM:SS.ms), anchored to the top right corner of the scene +# window. +frame_text = vtkTextActor() +frame_text.SetInput(current_time_text()) +frame_text.GetTextProperty().SetFontSize(18) +frame_text.GetTextProperty().SetJustificationToRight() +frame_text.GetTextProperty().SetVerticalJustificationToTop() +frame_text.GetPositionCoordinate().SetCoordinateSystemToNormalizedViewport() +frame_text.GetPositionCoordinate().SetValue(0.98, 0.98) +renderer.AddViewProp(frame_text) + +scene_window = vtkRenderWindow() +scene_window.SetWindowName("Input scene (VP9 encode)") +scene_window.AddRenderer(renderer) +scene_window.SetSize(width, height) +scene_window.SetPosition(50, 50) + +interactor = vtkRenderWindowInteractor() +interactor.SetRenderWindow(scene_window) +interactor.Initialize() +scene_window.Render() + +# Right window: displays whatever comes out of the decoder. +decoded_window = vtkRenderWindow() +decoded_window.SetWindowName("Decoded output (VP9 decode)") +decoded_window.SetSize(width, height) +decoded_window.SetPosition(50 + width + 20, 50) +decoded_window.Render() # initialize its OpenGL context before first use + +decoder = vtkVpxDecoder() +decoder.SetGraphicsContext(decoded_window) + + +@calldata_type(VTK_OBJECT) +def display_frame(_decoder: vtkVideoDecoder, _event: int, frame: vtkOpenGLVideoFrame): + # frame.Render presents into the window (it calls window->Frame() itself). + frame.Render(decoded_window) + + +decoder.AddObserver(vtkVideoDecoder.DecodedVideoFrameEvent, display_frame) + +encoder = vtkVpxEncoder() +encoder.SetGraphicsContext(scene_window) +encoder.SetCodec(VTKVC_VP9) +encoder.SetWidth(width) +encoder.SetHeight(height) +encoder.SetInputPixelFormat(VTKPF_IYUV) + + +@calldata_type(VTK_OBJECT) +def receive_packet( + _encoder: vtkVideoEncoder, _event: int, packet: vtkCompressedVideoPacket +): + # In a real application this bitstream would travel over the network; + # here it goes straight to the decoder. + decoder.Decode(packet) + + +encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + + +# The capture frame; its size must match the scene window's framebuffer. +picture = vtkOpenGLVideoFrame() +picture.SetContext(scene_window) +picture.SetWidth(width) +picture.SetHeight(height) +picture.SetPixelFormat(VTKPF_IYUV) +picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) +picture.AllocateDataStore() + + +def update_time_text(_window: vtkRenderWindow, _event: int): + frame_text.SetInput(current_time_text()) + + +def encode_frame(window: vtkRenderWindow, _event: int): + picture.Capture(window) + encoder.Encode(picture) # fires EncodedVideoChunkEvent per packet + # Decoding rendered into the other window; hand the context back. + window.MakeCurrent() + + +# StartEvent fires at the start of every vtkRenderWindow::Render, so the +# timestamp is current in the frame about to be drawn and encoded; +# EndEvent fires at the end. +scene_window.AddObserver(vtkCommand.StartEvent, update_time_text) +scene_window.AddObserver(vtkCommand.EndEvent, encode_frame) + + +def spin(_interactor: vtkRenderWindowInteractor, _event: int): + renderer.GetActiveCamera().Azimuth(1.0) + scene_window.Render() + + +interactor.AddObserver(vtkCommand.TimerEvent, spin) +interactor.CreateRepeatingTimer(33) + +print("Interact with the left window; the right window shows the decoded stream.") +print("Press 'q' or 'e' in the left window to quit.") +interactor.Start() + +encoder.Drain() +encoder.Shutdown() +# Note: do not call decoder.Drain(); vtkVpxDecoder::SendEOS forwards a null +# packet that DecodeInternal dereferences. Shutdown is safe. +decoder.Shutdown() diff --git a/examples/simple_nvenc_record.py b/examples/simple_nvenc_record.py new file mode 100644 index 0000000..6e9b682 --- /dev/null +++ b/examples/simple_nvenc_record.py @@ -0,0 +1,136 @@ +"""Live VP9 encode/decode round-trip with two render windows side by side. + +This uses an NVENC encoder to record the render window to a .h264 file. + +Tip: After running this, quit it and playback the recording with `ffplay recording.h264` +""" + +from datetime import datetime + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +import vtkmodules.vtkInteractionStyle # noqa: F401 (register interactor styles) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkCommonCore import vtkCommand +from vtkmodules.vtkFiltersSources import vtkCylinderSource +from vtkmodules.vtkRenderingCore import ( + vtkActor, + vtkPolyDataMapper, + vtkRenderer, + vtkRenderWindow, + vtkRenderWindowInteractor, + vtkTextActor, +) + +from vtk_streaming.vtkStreamingCore import ( + VTKPF_IYUV, + VTKVC_H264, + vtkCompressedVideoPacket, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame +from vtk_streaming.vtkStreamingNvEncode import vtkNvEncoderGL + +width, height = 640, 480 # codecs prefer sizes aligned to %4 or %8 + +# Left window: the scene that gets encoded. +cylinder = vtkCylinderSource() +mapper = vtkPolyDataMapper() +mapper.SetInputConnection(cylinder.GetOutputPort()) +actor = vtkActor() +actor.SetMapper(mapper) +actor.GetProperty().SetColor(255 / 255, 99 / 255, 71 / 255) # tomato +actor.RotateX(30.0) +actor.RotateY(-45.0) +renderer = vtkRenderer() +renderer.AddActor(actor) +renderer.SetBackground(0.1, 0.2, 0.4) + +def current_time_text() -> str: + now = datetime.now() + return f"{now:%H:%M:%S}.{now.microsecond // 1000:03d}" + + +# Time readout (HH:MM:SS.ms), anchored to the top right corner of the scene +# window. +frame_text = vtkTextActor() +frame_text.SetInput(current_time_text()) +frame_text.GetTextProperty().SetFontSize(18) +frame_text.GetTextProperty().SetJustificationToRight() +frame_text.GetTextProperty().SetVerticalJustificationToTop() +frame_text.GetPositionCoordinate().SetCoordinateSystemToNormalizedViewport() +frame_text.GetPositionCoordinate().SetValue(0.98, 0.98) +renderer.AddViewProp(frame_text) + +scene_window = vtkRenderWindow() +scene_window.SetWindowName("Input scene (VP9 encode)") +scene_window.AddRenderer(renderer) +scene_window.SetSize(width, height) +scene_window.SetPosition(50, 50) + +interactor = vtkRenderWindowInteractor() +interactor.SetRenderWindow(scene_window) +interactor.Initialize() +scene_window.Render() + +encoder = vtkNvEncoderGL() +encoder.SetGraphicsContext(scene_window) +encoder.SetCodec(VTKVC_H264) +encoder.SetWidth(width) +encoder.SetHeight(height) +encoder.SetInputPixelFormat(VTKPF_IYUV) + +@calldata_type(VTK_OBJECT) +def receive_packet( + _encoder: vtkVideoEncoder, _event: int, packet: vtkCompressedVideoPacket +): + with open("./recording.h264", mode='+ab') as f: + f.write(bytes(memoryview(packet.GetData()))) + + +encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + + +# The capture frame; its size must match the scene window's framebuffer. +picture = vtkOpenGLVideoFrame() +picture.SetContext(scene_window) +picture.SetWidth(width) +picture.SetHeight(height) +picture.SetPixelFormat(VTKPF_IYUV) +picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) +picture.AllocateDataStore() + + +def update_time_text(_window: vtkRenderWindow, _event: int): + frame_text.SetInput(current_time_text()) + + +def encode_frame(window: vtkRenderWindow, _event: int): + picture.Capture(window) + encoder.Encode(picture) # fires EncodedVideoChunkEvent per packet + # Decoding rendered into the other window; hand the context back. + window.MakeCurrent() + + +# StartEvent fires at the start of every vtkRenderWindow::Render, so the +# timestamp is current in the frame about to be drawn and encoded; +# EndEvent fires at the end. +scene_window.AddObserver(vtkCommand.StartEvent, update_time_text) +scene_window.AddObserver(vtkCommand.EndEvent, encode_frame) + + +def spin(_interactor: vtkRenderWindowInteractor, _event: int): + renderer.GetActiveCamera().Azimuth(1.0) + scene_window.Render() + + +interactor.AddObserver(vtkCommand.TimerEvent, spin) +interactor.CreateRepeatingTimer(33) + +print("Interact with the left window; the right window shows the decoded stream.") +print("Press 'q' or 'e' in the left window to quit.") +interactor.Start() + +encoder.Drain() +encoder.Shutdown() diff --git a/pyproject.toml b/pyproject.toml index c07a0ac..7a08073 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ download = "https://pypi.org/project/slicer-layer-dm/#files" tracker = "https://github.com/Kitware/VTKStreaming/issues" [project.optional-dependencies] -test = ["pytest", "virtualenv"] +test = ["pytest", "virtualenv", "numpy"] # Shared cibuildwheel config; CI (build-wheels.yml) only adds the # per-matrix CIBW_BUILD selection. Locally, build the same wheel with e.g. @@ -58,7 +58,7 @@ test = ["pytest", "virtualenv"] environment = { PIP_EXTRA_INDEX_URL = "https://vtk.org/files/wheel-sdks" } # No vtk-sdk wheels exist for musllinux. skip = "*-musllinux_*" -test-requires = ["pytest", "virtualenv"] +test-requires = ["pytest", "virtualenv", "numpy"] test-command = "python -m pytest {project} --verbose" [tool.cibuildwheel.linux] diff --git a/tests/test_encoder_resize.py b/tests/test_encoder_resize.py new file mode 100644 index 0000000..64ec669 --- /dev/null +++ b/tests/test_encoder_resize.py @@ -0,0 +1,175 @@ +"""Verify that encoding survives a mid-stream resize. + +vtkVideoEncoder::Encode tears down and reinitializes the encoding context +whenever the incoming frame's dimensions change, and vtkVideoDecoder +reinitializes from each packet's DisplayWidth/DisplayHeight. Moving color +bars are pushed through three sizes (grow then shrink). Both tests check +that every packet is stamped with the display dimensions of the size it +was encoded at. The VP9 test additionally round-trips every packet through +a vtkVpxDecoder and image-compares the input and decoded windows at each +size. There is no NVDEC decoder module, so the NVENC test instead checks +that every resize restarts the H.264 stream: the first packet at each size +must carry fresh SPS/PPS and an IDR slice. + +Resize handling lives in the encoder/decoder base classes, so one pixel +format (IYUV) is enough; the per-format upload paths are covered by the +push-receive tests. +""" + +import pytest + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.numpy_support import vtk_to_numpy +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkTestingRendering import vtkTesting + +from vtk_streaming.vtkStreamingCore import ( + VTKPF_IYUV, + VTKVC_H264, + VTKVC_VP9, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder +from vtk_streaming.vtkStreamingNvEncode import vtkNvEncoderGL +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame +from vtk_streaming.vtkStreamingVpxEncode import vtkVpxEncoder + +from tests import video_test_utils + +pytestmark = video_test_utils.requires_rendering() + +SIZES = [(320, 240), (480, 360), (160, 120)] +FRAMES_PER_SIZE = 10 +THRESHOLD = 0.05 + + +def _make_picture(window, width, height, pixel_format): + picture = vtkOpenGLVideoFrame() + picture.SetContext(window) + picture.SetWidth(width) + picture.SetHeight(height) + picture.SetPixelFormat(pixel_format) + picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) + picture.ComputeDefaultStrides() + picture.AllocateDataStore() + return picture + + +def _encode_bars_segment(encoder, picture, window, width, height): + est_size = vtkRawVideoFrame.GetEstimatedSize(width, height, VTKPF_IYUV) + for shift in range(FRAMES_PER_SIZE): + frame_bytes = video_test_utils.iyuv_frame_bytes( + video_test_utils.generate_rgba32_color_bars(width, height, shift) + ) + assert len(frame_bytes) == est_size # plane offsets rely on this + video_test_utils.upload_iyuv_frame(picture, frame_bytes, width, height) + picture.Render(window) + encoder.Encode(picture) + + +def test_vpx_encoder_decoder_resize(tmp_path): + pytest.importorskip( + "vtk_streaming.vtkStreamingVpxDecode", + reason="installed vtk_streaming wheel was built without the VpxDecode module", + ) + + input_window = video_test_utils.make_offscreen_window(*SIZES[0]) + decoded_window = video_test_utils.make_offscreen_window(*SIZES[0]) + decoder, decoded_frame_sizes = video_test_utils.make_decode_render_loop( + decoded_window + ) + + packet_dims = [] + + @calldata_type(VTK_OBJECT) + def receive_packet(_encoder, _event, packet): + packet_dims.append((packet.GetDisplayWidth(), packet.GetDisplayHeight())) + decoder.Decode(packet) + + encoder = vtkVpxEncoder() + encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + encoder.SetGraphicsContext(input_window) + encoder.SetCodec(VTKVC_VP9) + encoder.SetInputPixelFormat(VTKPF_IYUV) + + for width, height in SIZES: + input_window.SetSize(width, height) + input_window.Render() + decoded_window.SetSize(width, height) + decoded_window.Render() + # A fresh frame per size; the first Encode at the new dimensions makes + # the encoder rebuild its context. + picture = _make_picture(input_window, width, height, VTKPF_IYUV) + _encode_bars_segment(encoder, picture, input_window, width, height) + + # Both windows show this size's last frame; compare before moving on. + scratch = tmp_path / f"{width}x{height}" + scratch.mkdir() + result = video_test_utils.regression_compare_windows( + input_window, decoded_window, THRESHOLD, scratch + ) + assert result == vtkTesting.PASSED, f"image mismatch at {width}x{height}" + encoder.Drain() + + expected = [size for size in SIZES for _ in range(FRAMES_PER_SIZE)] + assert packet_dims == expected + assert decoded_frame_sizes == expected + + encoder.Shutdown() + decoder.Shutdown() + + +def _nal_unit_types(packet): + """Yield the nal_unit_type of every NAL unit in an Annex B packet. + + Emulation prevention guarantees the 3-byte start code pattern (which is + also the tail of the 4-byte form) never occurs inside a NAL payload, so a + plain scan finds exactly the real NAL boundaries. + """ + index = packet.find(b"\x00\x00\x01") + while index != -1: + yield packet[index + 3] & 0x1F + index = packet.find(b"\x00\x00\x01", index + 3) + + +@video_test_utils.requires_nvenc() +def test_nv_encoder_resize(): + input_window = video_test_utils.make_offscreen_window(*SIZES[0]) + + packets = [] + packet_dims = [] + + @calldata_type(VTK_OBJECT) + def receive_packet(_encoder, _event, packet): + packets.append(vtk_to_numpy(packet.GetData()).tobytes()) + packet_dims.append((packet.GetDisplayWidth(), packet.GetDisplayHeight())) + + encoder = vtkNvEncoderGL() + encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + encoder.SetGraphicsContext(input_window) + encoder.SetCodec(VTKVC_H264) + encoder.SetInputPixelFormat(VTKPF_IYUV) + + for width, height in SIZES: + input_window.SetSize(width, height) + input_window.Render() + picture = _make_picture(input_window, width, height, VTKPF_IYUV) + _encode_bars_segment(encoder, picture, input_window, width, height) + encoder.Drain() + + assert len(packets) == len(SIZES) * FRAMES_PER_SIZE + assert all(any(packet) for packet in packets) + expected = [size for size in SIZES for _ in range(FRAMES_PER_SIZE)] + assert packet_dims == expected + # Every resize tears the NVENC session down, so each size must open a new + # H.264 stream: SPS (7) and PPS (8) parameter sets plus an IDR slice (5). + for segment in range(len(SIZES)): + first_packet = packets[segment * FRAMES_PER_SIZE] + nal_types = set(_nal_unit_types(first_packet)) + assert {7, 8, 5} <= nal_types, ( + f"first packet at {SIZES[segment]} lacks SPS/PPS/IDR; " + f"NAL types found: {sorted(nal_types)}" + ) + + encoder.Shutdown() diff --git a/tests/test_nv_encode_decode_render_window_capture.py b/tests/test_nv_encode_decode_render_window_capture.py new file mode 100644 index 0000000..0124089 --- /dev/null +++ b/tests/test_nv_encode_decode_render_window_capture.py @@ -0,0 +1,105 @@ +"""Exercise display encoding with the libvpx VP9 encoder. + +A scene with a cylinder is captured from the +render window in IYUV or NV12 and encoded. The output +of encoder must have a valid h.264 stream for this test to pass. +""" + +import pytest + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.numpy_support import vtk_to_numpy +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkFiltersSources import vtkCylinderSource +from vtkmodules.vtkRenderingCore import vtkActor, vtkPolyDataMapper, vtkRenderer + +from vtk_streaming.vtkStreamingCore import ( # noqa: E402 + VTKPF_IYUV, + VTKPF_NV12, + VTKVC_H264, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder # noqa: E402 +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame # noqa: E402 +from vtk_streaming.vtkStreamingNvEncode import vtkNvEncoderGL # noqa: E402 + +from tests import video_test_utils # noqa: E402 + +pytestmark = [ + video_test_utils.requires_rendering(), + video_test_utils.requires_nvenc(), +] + +WIDTH = 641 +HEIGHT = 953 +NUM_FRAMES = 30 +# The captured scene goes through RGB->YUV 4:2:0 capture, VP9 quantization and +# YUV->RGB display, so allow more slack than the file-based round-trip. +THRESHOLD = 0.1 + + +def _make_cylinder_scene(): + cylinder = vtkCylinderSource() + mapper = vtkPolyDataMapper() + mapper.SetInputConnection(cylinder.GetOutputPort()) + actor = vtkActor() + actor.SetMapper(mapper) + actor.GetProperty().SetColor(255 / 255, 99 / 255, 71 / 255) # tomato + actor.RotateX(30.0) + actor.RotateY(-45.0) + renderer = vtkRenderer() + renderer.AddActor(actor) + renderer.SetBackground(26 / 255, 51 / 255, 102 / 255) + return renderer + + +@pytest.mark.parametrize( + "pixel_format", [VTKPF_IYUV, VTKPF_NV12], ids=["iyuv", "nv12"] +) +def test_nv_encode_decode_render_window_capture(pixel_format, tmp_path): + scene_window = video_test_utils.make_offscreen_window(WIDTH, HEIGHT) + renderer = _make_cylinder_scene() + scene_window.AddRenderer(renderer) + scene_window.Render() + + packets = [] + + @calldata_type(VTK_OBJECT) + def receive_packet(_encoder, _event, packet): + packets.append(vtk_to_numpy(packet.GetData()).tobytes()) + + encoder = vtkNvEncoderGL() + encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + encoder.SetGraphicsContext(scene_window) + encoder.SetCodec(VTKVC_H264) + encoder.SetWidth(WIDTH) + encoder.SetHeight(HEIGHT) + encoder.SetInputPixelFormat(pixel_format) + + picture = vtkOpenGLVideoFrame() + picture.SetContext(scene_window) + picture.SetWidth(WIDTH) + picture.SetHeight(HEIGHT) + picture.SetPixelFormat(pixel_format) + picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) + picture.AllocateDataStore() + + for _frame_id in range(NUM_FRAMES): + renderer.GetActiveCamera().Azimuth(2.0) + scene_window.Render() + picture.Capture(scene_window) + encoder.Encode(picture) + + # drain needs an OpenGL context so it can release the resources. + encoder.Drain() + + assert len(packets) == NUM_FRAMES + assert all(len(packet) > 10 for packet in packets[1:]) + # A valid H.264 Annex B stream opens with a start code, and every packet + # must carry real payload rather than zero filler. + # Both of these are valid stream openings. + assert packets[0][:4] == b"\x00\x00\x00\x01" or packets[0][:3] == b"\x00\x00\x01" + assert all(any(packet) for packet in packets) + + encoder.Shutdown() diff --git a/tests/test_nv_encode_simple.py b/tests/test_nv_encode_simple.py new file mode 100644 index 0000000..486e356 --- /dev/null +++ b/tests/test_nv_encode_simple.py @@ -0,0 +1,107 @@ +"""Exercise the NVENC H.264 encoder with IYUV, NV12 and RGBA32 raw frame inputs. + +moving color bars are generated in memory and uploaded straight to the +vtkOpenGLVideoFrame. There is no NVDEC decoder module, so instead of an image +round-trip the test checks that the emitted bitstream is a plausible H.264 +Annex B stream and not all zeros. +""" + +import pytest + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.numpy_support import vtk_to_numpy +from vtkmodules.util.vtkConstants import VTK_OBJECT + +from vtk_streaming.vtkStreamingCore import ( + VTKPF_IYUV, + VTKPF_NV12, + VTKPF_RGBA32, + VTKVC_H264, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder +from vtk_streaming.vtkStreamingNvEncode import vtkNvEncoderGL +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame + +from tests import video_test_utils + +pytestmark = [ + video_test_utils.requires_rendering(), + video_test_utils.requires_nvenc(), +] + +WIDTH = 320 +HEIGHT = 240 +NUM_FRAMES = 30 + +_FORMATS = { + "iyuv": ( + VTKPF_IYUV, + video_test_utils.iyuv_frame_bytes, + video_test_utils.upload_iyuv_frame, + ), + "nv12": ( + VTKPF_NV12, + video_test_utils.nv12_frame_bytes, + video_test_utils.upload_nv12_frame, + ), + "rgba32": ( + VTKPF_RGBA32, + video_test_utils.rgba32_frame_bytes, + video_test_utils.upload_rgba32_frame, + ), +} + + +@pytest.mark.parametrize("format_name", sorted(_FORMATS)) +def test_nv_encode_simple(format_name): + pixel_format, pack_frame, upload = _FORMATS[format_name] + + input_window = video_test_utils.make_offscreen_window(WIDTH, HEIGHT) + + packets = [] + + @calldata_type(VTK_OBJECT) + def receive_packet(_encoder, _event, packet): + packets.append(vtk_to_numpy(packet.GetData()).tobytes()) + + encoder = vtkNvEncoderGL() + encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + encoder.SetGraphicsContext(input_window) + encoder.SetWidth(WIDTH) + encoder.SetHeight(HEIGHT) + encoder.SetCodec(VTKVC_H264) + encoder.SetInputPixelFormat(pixel_format) + + picture = vtkOpenGLVideoFrame() + picture.SetContext(input_window) + picture.SetWidth(WIDTH) + picture.SetHeight(HEIGHT) + picture.SetPixelFormat(pixel_format) + picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) + picture.ComputeDefaultStrides() + picture.AllocateDataStore() + + est_size = vtkRawVideoFrame.GetEstimatedSize(WIDTH, HEIGHT, pixel_format) + + for shift in range(NUM_FRAMES): + frame_bytes = pack_frame( + video_test_utils.generate_rgba32_color_bars(WIDTH, HEIGHT, shift) + ) + assert len(frame_bytes) == est_size # plane offsets rely on this + upload(picture, frame_bytes, WIDTH, HEIGHT) + picture.Render(input_window) + encoder.Encode(picture) + # drain out remaining packets + encoder.Drain() + + assert len(packets) == NUM_FRAMES + assert all(len(packet) > 10 for packet in packets[1:]) + # A valid H.264 Annex B stream opens with a start code, and every packet + # must carry real payload rather than zero filler. + # Both of these are valid stream openings. + assert packets[0][:4] == b"\x00\x00\x00\x01" or packets[0][:3] == b"\x00\x00\x01" + assert all(any(packet) for packet in packets) + + encoder.Shutdown() diff --git a/tests/test_vpx_encode_decode_render_window_capture.py b/tests/test_vpx_encode_decode_render_window_capture.py new file mode 100644 index 0000000..334d749 --- /dev/null +++ b/tests/test_vpx_encode_decode_render_window_capture.py @@ -0,0 +1,119 @@ +"""Exercise display encoding with the libvpx VP9 encoder. + +a scene with a cylinder is captured from the +render window in IYUV or NV12 and encoded. Round-trip upgrade: every encoded +packet is fed to a vtkVpxDecoder whose decoded frames are rendered into a +second render window, and the two windows are compared with +vtkTesting.RegressionTest. +""" + +import pytest + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkFiltersSources import vtkCylinderSource +from vtkmodules.vtkRenderingCore import vtkActor, vtkPolyDataMapper, vtkRenderer +from vtkmodules.vtkTestingRendering import vtkTesting + +pytest.importorskip( + "vtk_streaming.vtkStreamingVpxDecode", + reason="installed vtk_streaming wheel was built without the VpxDecode module", +) + +from vtk_streaming.vtkStreamingCore import ( # noqa: E402 + VTKPF_IYUV, + VTKPF_NV12, + VTKVC_VP9, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder # noqa: E402 +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame # noqa: E402 +from vtk_streaming.vtkStreamingVpxEncode import vtkVpxEncoder # noqa: E402 + +from tests import video_test_utils # noqa: E402 + +pytestmark = video_test_utils.requires_rendering() + +WIDTH = 641 +HEIGHT = 953 +NUM_FRAMES = 30 +# The captured scene goes through RGB->YUV 4:2:0 capture, VP9 quantization and +# YUV->RGB display, so allow more slack than the file-based round-trip. +THRESHOLD = 0.1 + + +def _make_cylinder_scene(): + cylinder = vtkCylinderSource() + mapper = vtkPolyDataMapper() + mapper.SetInputConnection(cylinder.GetOutputPort()) + actor = vtkActor() + actor.SetMapper(mapper) + actor.GetProperty().SetColor(255 / 255, 99 / 255, 71 / 255) # tomato + actor.RotateX(30.0) + actor.RotateY(-45.0) + renderer = vtkRenderer() + renderer.AddActor(actor) + renderer.SetBackground(26 / 255, 51 / 255, 102 / 255) + return renderer + + +@pytest.mark.parametrize( + "pixel_format", [VTKPF_IYUV, VTKPF_NV12], ids=["iyuv", "nv12"] +) +def test_vpx_encode_decode_render_window_capture(pixel_format, tmp_path): + scene_window = video_test_utils.make_offscreen_window(WIDTH, HEIGHT) + renderer = _make_cylinder_scene() + scene_window.AddRenderer(renderer) + scene_window.Render() + + decoded_window = video_test_utils.make_offscreen_window(WIDTH, HEIGHT) + decoder, decoded_frame_sizes = video_test_utils.make_decode_render_loop( + decoded_window + ) + + packet_sizes = [] + + @calldata_type(VTK_OBJECT) + def receive_packet(_encoder, _event, packet): + packet_sizes.append(packet.GetSize()) + decoder.Decode(packet) + + encoder = vtkVpxEncoder() + encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + encoder.SetGraphicsContext(scene_window) + encoder.SetCodec(VTKVC_VP9) + encoder.SetWidth(WIDTH) + encoder.SetHeight(HEIGHT) + encoder.SetInputPixelFormat(pixel_format) + + picture = vtkOpenGLVideoFrame() + picture.SetContext(scene_window) + picture.SetWidth(WIDTH) + picture.SetHeight(HEIGHT) + picture.SetPixelFormat(pixel_format) + picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) + picture.AllocateDataStore() + + for _frame_id in range(NUM_FRAMES): + renderer.GetActiveCamera().Azimuth(2.0) + scene_window.Render() + picture.Capture(scene_window) + encoder.Encode(picture) + + # drain needs an OpenGL context so it can release the resources. + encoder.Drain() + + assert len(packet_sizes) == NUM_FRAMES + assert all(size > 10 for size in packet_sizes[1:]) + assert decoded_frame_sizes == [(WIDTH, HEIGHT)] * len(packet_sizes) + + # The scene window shows the last rendered frame, the decoded window the + # last round-tripped frame; they must match up to codec loss. + result = video_test_utils.regression_compare_windows( + scene_window, decoded_window, THRESHOLD, tmp_path + ) + assert result == vtkTesting.PASSED + + encoder.Shutdown() + decoder.Shutdown() diff --git a/tests/test_vpx_encode_decode_simple.py b/tests/test_vpx_encode_decode_simple.py new file mode 100644 index 0000000..d3aaa44 --- /dev/null +++ b/tests/test_vpx_encode_decode_simple.py @@ -0,0 +1,116 @@ +"""Exercise the libvpx VP9 encoder with IYUV and NV12 raw frame inputs. + +moving color bars are generated in memory instead and uploaded straight to the vtkOpenGLVideoFrame. +Round-trip: every encoded packet is fed to a vtkVpxDecoder whose decoded frames are +rendered into a second render window, and the two windows are compared with +vtkTesting.RegressionTest. +""" + +import pytest + +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.util.misc import calldata_type +from vtkmodules.util.vtkConstants import VTK_OBJECT +from vtkmodules.vtkTestingRendering import vtkTesting + +pytest.importorskip( + "vtk_streaming.vtkStreamingVpxDecode", + reason="installed vtk_streaming wheel was built without the VpxDecode module", +) + +from vtk_streaming.vtkStreamingCore import ( # noqa: E402 + VTKPF_IYUV, + VTKPF_NV12, + VTKVC_VP9, + vtkRawVideoFrame, +) +from vtk_streaming.vtkStreamingEncode import vtkVideoEncoder # noqa: E402 +from vtk_streaming.vtkStreamingOpenGL2 import vtkOpenGLVideoFrame # noqa: E402 +from vtk_streaming.vtkStreamingVpxEncode import vtkVpxEncoder # noqa: E402 + +from tests import video_test_utils # noqa: E402 + +pytestmark = video_test_utils.requires_rendering() + +WIDTH = 320 +HEIGHT = 240 +NUM_FRAMES = 30 +# Both windows show the same YUV data modulo VP9 quantization, so the tight +# default threshold is enough; bump towards 0.1 if encoding pixelates more. +THRESHOLD = 0.05 + + +_FORMATS = { + "iyuv": ( + VTKPF_IYUV, + video_test_utils.iyuv_frame_bytes, + video_test_utils.upload_iyuv_frame, + ), + "nv12": ( + VTKPF_NV12, + video_test_utils.nv12_frame_bytes, + video_test_utils.upload_nv12_frame, + ), +} + + +@pytest.mark.parametrize("format_name", sorted(_FORMATS)) +def test_vpx_encode_decode_simple(format_name, tmp_path): + pixel_format, pack_frame, upload = _FORMATS[format_name] + + input_window = video_test_utils.make_offscreen_window(WIDTH, HEIGHT) + decoded_window = video_test_utils.make_offscreen_window(WIDTH, HEIGHT) + decoder, decoded_frame_sizes = video_test_utils.make_decode_render_loop( + decoded_window + ) + + packet_sizes = [] + + @calldata_type(VTK_OBJECT) + def receive_packet(_encoder, _event, packet): + packet_sizes.append(packet.GetSize()) + decoder.Decode(packet) + + encoder = vtkVpxEncoder() + encoder.AddObserver(vtkVideoEncoder.EncodedVideoChunkEvent, receive_packet) + encoder.SetGraphicsContext(input_window) + encoder.SetWidth(WIDTH) + encoder.SetHeight(HEIGHT) + encoder.SetCodec(VTKVC_VP9) + encoder.SetInputPixelFormat(pixel_format) + + picture = vtkOpenGLVideoFrame() + picture.SetContext(input_window) + picture.SetWidth(WIDTH) + picture.SetHeight(HEIGHT) + picture.SetPixelFormat(pixel_format) + picture.SetSliceOrderType(vtkRawVideoFrame.TopDown) + picture.ComputeDefaultStrides() + picture.AllocateDataStore() + + est_size = vtkRawVideoFrame.GetEstimatedSize(WIDTH, HEIGHT, pixel_format) + + for shift in range(NUM_FRAMES): + frame_bytes = pack_frame( + video_test_utils.generate_rgba32_color_bars(WIDTH, HEIGHT, shift) + ) + assert len(frame_bytes) == est_size # plane offsets below rely on this + upload(picture, frame_bytes, WIDTH, HEIGHT) + picture.Render(input_window) + encoder.Encode(picture) + # drain out remaining packets + encoder.Drain() + + assert len(packet_sizes) == NUM_FRAMES + assert all(size > 10 for size in packet_sizes[1:]) + assert decoded_frame_sizes == [(WIDTH, HEIGHT)] * len(packet_sizes) + + # The input window shows the last uploaded frame, the decoded window the + # last round-tripped frame; they must match up to codec loss. + result = video_test_utils.regression_compare_windows( + input_window, decoded_window, THRESHOLD, tmp_path + ) + assert result == vtkTesting.PASSED + + encoder.Shutdown() + decoder.Shutdown() diff --git a/tests/video_test_utils.py b/tests/video_test_utils.py new file mode 100644 index 0000000..893f15b --- /dev/null +++ b/tests/video_test_utils.py @@ -0,0 +1,293 @@ +"""Shared helpers for the VPX encoder/decoder round-trip tests. + +These tests render with OpenGL and exercise the real libvpx codec, so they +only run where a working OpenGL context can be created; guard every test +module that uses them with ``requires_rendering()`` so headless CI skips them +(see "Testing constraints" in CLAUDE.md). +""" + +import functools +import subprocess +import sys + +import numpy as np +import pytest + +from vtkmodules.util.numpy_support import numpy_to_vtk + +# Bar colors from vtkStreamingTestUtility::GenerateRGBA32ColorBars: +# white, yellow, cyan, green, magenta, red, blue, black. +_BAR_COLORS = np.array( + [ + [255, 255, 255, 255], + [255, 255, 0, 255], + [0, 255, 255, 255], + [0, 255, 0, 255], + [255, 0, 255, 255], + [255, 0, 0, 255], + [0, 0, 255, 255], + [0, 0, 0, 255], + ], + dtype=np.uint8, +) + + +def generate_rgba32_color_bars(width, height, shift=0): + """numpy port of vtkStreamingTestUtility::GenerateRGBA32ColorBars. + + Returns a (height, width, 4) uint8 array of vertical color bars; the top + and bottom halves scroll in opposite directions as ``shift`` increases. + """ + ndivs = (width + 7) >> 3 + column_bar = np.arange(width) // ndivs + top_bar = (column_bar + (shift % 8)) % 8 + bottom_bar = (7 - column_bar + (shift % 8)) % 8 + frame = np.empty((height, width, 4), dtype=np.uint8) + frame[: height >> 1] = _BAR_COLORS[top_bar] + frame[height >> 1 :] = _BAR_COLORS[bottom_bar] + return frame + + +def rgba_to_yuv420_planes(rgba): + """Convert an RGBA frame to limited-range BT.709 4:2:0 Y, U, V planes. + + The matrix is the inverse of the YCbCr-to-RGB matrix hard-coded in + Streaming/OpenGL2/glsl/vtkIYUVRenderFS.glsl so a render of the generated + planes reproduces the original colors. + """ + rgb = rgba[..., :3].astype(np.float64) / 255.0 + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + y = 0.2126 * r + 0.7152 * g + 0.0722 * b + cb = (b - y) / 1.8556 + cr = (r - y) / 1.5748 + + def subsample(plane): + rows, cols = plane.shape + return plane.reshape(rows // 2, 2, cols // 2, 2).mean(axis=(1, 3)) + + def quantize(plane, scale, offset): + return np.clip(np.round(offset + scale * plane), 0, 255).astype(np.uint8) + + return ( + quantize(y, 219.0, 16.0), + quantize(subsample(cb), 224.0, 128.0), + quantize(subsample(cr), 224.0, 128.0), + ) + + +def iyuv_frame_bytes(rgba): + """Pack an RGBA frame as one packed I420 (a.k.a. IYUV) frame.""" + y, u, v = rgba_to_yuv420_planes(rgba) + return y.tobytes() + u.tobytes() + v.tobytes() + + +def nv12_frame_bytes(rgba): + """Pack an RGBA frame as one packed NV12 frame (Y plane + interleaved UV).""" + y, u, v = rgba_to_yuv420_planes(rgba) + uv = np.empty((u.shape[0], u.shape[1] * 2), dtype=np.uint8) + uv[:, 0::2] = u + uv[:, 1::2] = v + return y.tobytes() + uv.tobytes() + + +def rgba32_frame_bytes(rgba): + """Pack an RGBA frame as one packed RGBA32 frame.""" + return rgba.tobytes() + + +def as_vtk_uchar_array(buf): + """Copy a bytes-like object into a vtkUnsignedCharArray.""" + return numpy_to_vtk(np.frombuffer(buf, dtype=np.uint8), deep=True) + + +def upload_iyuv_frame(picture, frame_bytes, width, height): + """Copy one packed I420 frame into a vtkOpenGLVideoFrame, plane by plane.""" + luma_size = width * height + chroma_rowsize = width >> 1 + chroma_numrows = (height + 1) >> 1 + chroma_size = chroma_rowsize * chroma_numrows + picture.CopyPlanarData( + as_vtk_uchar_array(frame_bytes[:luma_size]), width, height, 0 + ) + picture.CopyPlanarData( + as_vtk_uchar_array(frame_bytes[luma_size : luma_size + chroma_size]), + chroma_rowsize, + chroma_numrows, + 1, + ) + picture.CopyPlanarData( + as_vtk_uchar_array(frame_bytes[luma_size + chroma_size :]), + chroma_rowsize, + chroma_numrows, + 2, + ) + + +def upload_nv12_frame(picture, frame_bytes, width, height): + """Copy one packed NV12 frame into a vtkOpenGLVideoFrame, plane by plane.""" + luma_size = width * height + picture.CopyPlanarData( + as_vtk_uchar_array(frame_bytes[:luma_size]), width, height, 0 + ) + picture.CopyPlanarData( + as_vtk_uchar_array(frame_bytes[luma_size:]), width, (height + 1) >> 1, 1 + ) + + +def upload_rgba32_frame(picture, frame_bytes, width, height): + """Copy one packed RGBA32 frame into a vtkOpenGLVideoFrame.""" + picture.CopyData(as_vtk_uchar_array(frame_bytes), width * 4, height) + + +_RENDER_PROBE = """ +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.vtkRenderingCore import vtkRenderWindow +win = vtkRenderWindow() +win.SetOffScreenRendering(True) +win.SetSize(32, 32) +win.Render() +""" + + +@functools.lru_cache(maxsize=1) +def rendering_available(): + """Whether an OpenGL render window can be created on this machine. + + Probed in a subprocess because a failed context creation can abort the + whole process rather than raise. + """ + try: + probe = subprocess.run( + [sys.executable, "-c", _RENDER_PROBE], capture_output=True, timeout=120 + ) + except (OSError, subprocess.TimeoutExpired): + return False + return probe.returncode == 0 + + +def requires_rendering(): + return pytest.mark.skipif( + not rendering_available(), + reason="cannot create an OpenGL render window (headless environment)", + ) + + +# vtkNvEncoderGL::CheckAvailability() calls cuGLGetDevices, which needs a +# current OpenGL context on the CUDA device; without one it reports NVENC as +# unavailable even on NVIDIA hardware. +_NVENC_PROBE = """ +import vtkmodules.vtkRenderingOpenGL2 # noqa: F401 (register the OpenGL factory) +from vtkmodules.vtkRenderingCore import vtkRenderWindow +win = vtkRenderWindow() +win.SetOffScreenRendering(True) +win.SetSize(32, 32) +win.Render() +from vtk_streaming.vtkStreamingNvEncode import vtkNvEncoderGL +raise SystemExit(0 if vtkNvEncoderGL.CheckAvailability() else 1) +""" + + +@functools.lru_cache(maxsize=1) +def nvenc_available(): + """Whether NVENC encoding is available, probed under a live GL context.""" + try: + probe = subprocess.run( + [sys.executable, "-c", _NVENC_PROBE], capture_output=True, timeout=120 + ) + except (OSError, subprocess.TimeoutExpired): + return False + return probe.returncode == 0 + + +def requires_nvenc(): + return pytest.mark.skipif( + not nvenc_available(), + reason="NVENC is not available (no NVIDIA driver, or OpenGL is not on " + "the NVIDIA GPU)", + ) + + +def make_offscreen_window(width, height): + """Create and initialize an offscreen render window. + + Offscreen keeps the framebuffer exactly width x height (no window-manager + resizing) so the regression image comparison is stable. + """ + from vtkmodules.vtkRenderingCore import vtkRenderWindow + + window = vtkRenderWindow() + window.SetOffScreenRendering(True) + window.SetSize(width, height) + window.Render() + return window + + +def make_decode_render_loop(window): + """Create a vtkVpxDecoder that renders every decoded frame into ``window``. + + Returns (decoder, decoded_frame_sizes) where decoded_frame_sizes collects + a (width, height) tuple per decoded frame. Feed the decoder by calling + ``decoder.Decode(packet)`` with packets observed from an encoder. + + Do not call ``decoder.Drain()``: vtkVpxDecoder::SendEOS() forwards a null + packet that DecodeInternal dereferences. ``Shutdown()`` is safe. + """ + from vtkmodules.util.misc import calldata_type + from vtkmodules.util.vtkConstants import VTK_OBJECT + + from vtk_streaming.vtkStreamingDecode import vtkVideoDecoder + from vtk_streaming.vtkStreamingVpxDecode import vtkVpxDecoder + + decoder = vtkVpxDecoder() + decoder.SetGraphicsContext(window) + decoded_frame_sizes = [] + + @calldata_type(VTK_OBJECT) + def render_decoded_frame(_decoder, _event, frame): + frame.Render(window) + decoded_frame_sizes.append((frame.GetWidth(), frame.GetHeight())) + + decoder.AddObserver(vtkVideoDecoder.DecodedVideoFrameEvent, render_decoded_frame) + return decoder, decoded_frame_sizes + + +def regression_compare_windows(baseline_window, test_window, threshold, scratch_dir): + """Compare the current contents of two render windows with vtkTesting. + + The baseline window's framebuffer is written out as the valid image, then + ``vtkTesting.RegressionTest`` grades the test window's framebuffer against + it. Returns vtkTesting.PASSED when the images match within ``threshold``; + difference images land in ``scratch_dir`` on failure. + """ + import os + + from vtkmodules.vtkIOImage import vtkPNGWriter + from vtkmodules.vtkRenderingCore import vtkWindowToImageFilter + from vtkmodules.vtkTestingRendering import vtkTesting + + # Pin the comparison method: the SSIM-based TIGHT_VALID/LOOSE_VALID + # methods cannot tell codec blur from a genuinely different image here + # (measured 0.113 for a VP9 round-trip vs 0.133 for a different camera + # angle), while the legacy metric separates them by orders of magnitude + # (0 vs ~1000). Exported VTK_TESTING_IMAGE_COMPARE_METHOD still wins. + os.environ.setdefault("VTK_TESTING_IMAGE_COMPARE_METHOD", "LEGACY_VALID") + + baseline_png = str(scratch_dir / "baseline.png") + grab_baseline = vtkWindowToImageFilter() + grab_baseline.SetInput(baseline_window) + grab_baseline.ShouldRerenderOff() + writer = vtkPNGWriter() + writer.SetFileName(baseline_png) + writer.SetInputConnection(grab_baseline.GetOutputPort()) + writer.Write() + + grab_test = vtkWindowToImageFilter() + grab_test.SetInput(test_window) + grab_test.ShouldRerenderOff() + + testing = vtkTesting() + testing.AddArgument("-T") + testing.AddArgument(str(scratch_dir)) + testing.AddArgument("-V") + testing.AddArgument(baseline_png) + return testing.RegressionTest(grab_test, threshold)