diff --git a/src/cmake/testing.cmake b/src/cmake/testing.cmake index 05e01fa58b..6bc93e924e 100644 --- a/src/cmake/testing.cmake +++ b/src/cmake/testing.cmake @@ -280,7 +280,7 @@ macro (osl_add_all_tests) cellnoise closure closure-array closure-layered closure-parameters closure-zero closure-conditional color color2 color4 color-reg colorspace comparison complement-reg compile-buffer compassign-bool compassign-reg - component-range + component-range compstats control-flow-reg connect-components const-array-params const-array-fill debugnan debug-uninit diff --git a/src/liboslexec/oslexec_pvt.h b/src/liboslexec/oslexec_pvt.h index 9d7acc6175..acc99087e8 100644 --- a/src/liboslexec/oslexec_pvt.h +++ b/src/liboslexec/oslexec_pvt.h @@ -889,6 +889,7 @@ class ShadingSystemImpl { // Options int m_statslevel; ///< Statistics level + int m_stat_rank_groups = 5; ///< How many groups to list in ranked stats bool m_lazylayers; ///< Evaluate layers on demand? bool m_lazyglobals; ///< Run lazily even if globals write? bool m_lazyunconnected; ///< Run lazily even if not connected? @@ -1841,6 +1842,15 @@ class ShaderGroup { long long int executions() const { return m_executions; } + int stat_active_layers() const { return m_stat_active_layers; } + void stat_active_layers(int n) { m_stat_active_layers = n; } + int stat_network_depth() const { return m_stat_network_depth; } + void stat_network_depth(int n) { m_stat_network_depth = n; } + int stat_texture_ops() const { return m_stat_texture_ops; } + void stat_texture_ops(int n) { m_stat_texture_ops = n; } + int stat_noise_ops() const { return m_stat_noise_ops; } + void stat_noise_ops(int n) { m_stat_noise_ops = n; } + void start_running() { #ifndef NDEBUG @@ -2057,6 +2067,11 @@ class ShaderGroup { bool m_unknown_attributes_needed; atomic_ll m_executions { 0 }; ///< Number of times the group executed atomic_ll m_stat_total_shading_time_ticks { 0 }; // Shading time (ticks) + // Post-optimization compile stats (written once in RuntimeOptimizer::run) + int m_stat_active_layers = 0; // Non-unused layers after dead-layer elim + int m_stat_network_depth = 0; // Max layer-to-layer connection chain length + int m_stat_texture_ops = 0; // Texture op count across active layers + int m_stat_noise_ops = 0; // Noise op count across active layers std::string m_optix_cache_key; diff --git a/src/liboslexec/runtimeoptimize.cpp b/src/liboslexec/runtimeoptimize.cpp index 4880ab1f63..6fd347164d 100644 --- a/src/liboslexec/runtimeoptimize.cpp +++ b/src/liboslexec/runtimeoptimize.cpp @@ -3474,6 +3474,11 @@ RuntimeOptimizer::run() max_network_depth = std::max(max_network_depth, layer_depth[layer]); } + group().stat_active_layers(active_layers); + group().stat_network_depth(max_network_depth); + group().stat_texture_ops(n_texture_ops); + group().stat_noise_ops(n_noise_ops); + m_stat_specialization_time = rop_timer(); { // adjust memory stats diff --git a/src/liboslexec/shadingsys.cpp b/src/liboslexec/shadingsys.cpp index 671b396fcb..098b7e8f9a 100644 --- a/src/liboslexec/shadingsys.cpp +++ b/src/liboslexec/shadingsys.cpp @@ -1605,6 +1605,7 @@ ShadingSystemImpl::attribute(string_view name, TypeDesc type, const void* val) lock_guard guard(m_mutex); // Thread safety ATTR_SET("statistics:level", int, m_statslevel); + ATTR_SET("stat:rank_groups", int, m_stat_rank_groups); ATTR_SET("debug", int, m_debug); ATTR_SET("lazylayers", int, m_lazylayers); ATTR_SET("lazyglobals", int, m_lazyglobals); @@ -1797,6 +1798,7 @@ ShadingSystemImpl::getattribute(string_view name, TypeDesc type, void* val) ATTR_DECODE_STRING("searchpath:shader", m_searchpath); ATTR_DECODE_STRING("searchpath:library", m_library_searchpath); ATTR_DECODE("statistics:level", int, m_statslevel); + ATTR_DECODE("stat:rank_groups", int, m_stat_rank_groups); ATTR_DECODE("lazylayers", int, m_lazylayers); ATTR_DECODE("lazyglobals", int, m_lazyglobals); ATTR_DECODE("lazyunconnected", int, m_lazyunconnected); @@ -2168,6 +2170,22 @@ ShadingSystemImpl::getattribute(ShaderGroup* group, string_view name, *(int*)val = group->m_exec_repeat; return true; } + if (name == "stat:compiled_active_layers" && type == TypeInt) { + *(int*)val = group->stat_active_layers(); + return true; + } + if (name == "stat:compiled_network_depth" && type == TypeInt) { + *(int*)val = group->stat_network_depth(); + return true; + } + if (name == "stat:compiled_texture_ops" && type == TypeInt) { + *(int*)val = group->stat_texture_ops(); + return true; + } + if (name == "stat:compiled_noise_ops" && type == TypeInt) { + *(int*)val = group->stat_noise_ops(); + return true; + } if (name == "ptx_compiled_version" && type.basetype == TypeDesc::PTR) { bool exists = !group->m_llvm_ptx_compiled_version.empty(); *(std::string*)val = exists ? group->m_llvm_ptx_compiled_version : ""; @@ -2714,6 +2732,70 @@ ShadingSystemImpl::getstats(int level) const } } + // Ranked shader groups by compile-time complexity metrics + if (m_stat_groups_compiled > 0) { + // Collect a snapshot of all still-live compiled (optimized) groups. + std::vector groups; + { + spin_lock lock(m_all_shader_groups_mutex); + for (auto&& w : m_all_shader_groups) + if (ShaderGroupRef g = w.lock()) + if (g->optimized()) + groups.push_back(g); + } + using StatVal = std::pair; + print(out, " Shader compilation stats, post-optimized:\n"); + auto emit_ranked_groups = + [&](string_view label, string_view unit, + std::function getter) { + if (groups.empty()) + return; + // Gather values from all compiled groups for aggregate stats. + std::vector vals; + vals.reserve(groups.size()); + for (auto&& g : groups) + vals.push_back(getter(*g)); + std::sort(vals.begin(), vals.end()); + int vmin = vals.front(); + int vmax = vals.back(); + int vmedian = vals[vals.size() / 2]; + print(out, " {}: min={} max={} median={}\n", label, vmin, + vmax, vmedian); + // Ranked list: exclude groups with value 0. + std::vector ranked; + for (auto&& g : groups) { + int v = getter(*g); + if (v > 0) + ranked.emplace_back(v, g->name()); + } + if (ranked.empty()) + return; + std::sort(ranked.begin(), ranked.end(), + [](const StatVal& a, const StatVal& b) { + return a.first != b.first ? a.first > b.first + : a.second < b.second; + }); + if ((int)ranked.size() > m_stat_rank_groups) + ranked.resize(m_stat_rank_groups); + print(out, " Top shader groups:\n"); + for (auto&& [v, name] : ranked) + print(out, " {:>6} {} \"{}\"\n", v, unit, + name.size() ? name.c_str() : ""); + }; + emit_ranked_groups("Active layers", "layers", [](const ShaderGroup& g) { + return g.stat_active_layers(); + }); + emit_ranked_groups("Network depth", "depth", [](const ShaderGroup& g) { + return g.stat_network_depth(); + }); + emit_ranked_groups("Texture ops", "ops", [](const ShaderGroup& g) { + return g.stat_texture_ops(); + }); + emit_ranked_groups("Noise ops", "ops", [](const ShaderGroup& g) { + return g.stat_noise_ops(); + }); + } + return out.str(); } diff --git a/src/testshade/testshade.cpp b/src/testshade/testshade.cpp index 9c1f999da8..20836ffffa 100644 --- a/src/testshade/testshade.cpp +++ b/src/testshade/testshade.cpp @@ -81,6 +81,7 @@ static bool debug_uninit = false; static bool use_group_outputs = false; static bool do_oslquery = false; static bool print_groupdata = false; +static bool print_group_stats = false; static bool inbuffer = false; static bool use_shade_image = false; static bool userdata_isconnected = false; @@ -828,6 +829,8 @@ getargs(int argc, const char* argv[]) .help("Test OSLQuery at runtime"); ap.arg("--print-groupdata", &print_groupdata) .help("Print groupdata size to stdout"); + ap.arg("--print-group-stats", &print_group_stats) + .help("Print per-group compile stats (active_layers, network_depth, texture_ops, noise_ops) to stdout"); ap.arg("--inbuffer", &inbuffer) .help("Compile osl source from and to jbuffer"); ap.arg("--no-output-placement") @@ -2335,6 +2338,23 @@ test_shade(int argc, const char* argv[]) std::cout << "Groupdata size: " << groupdata_size << "\n"; } + if (print_group_stats && !batched) { + int active_layers = 0, network_depth = 0, texture_ops = 0, + noise_ops = 0; + shadingsys->getattribute(shadergroup.get(), + "stat:compiled_active_layers", active_layers); + shadingsys->getattribute(shadergroup.get(), + "stat:compiled_network_depth", network_depth); + shadingsys->getattribute(shadergroup.get(), "stat:compiled_texture_ops", + texture_ops); + shadingsys->getattribute(shadergroup.get(), "stat:compiled_noise_ops", + noise_ops); + OSL::print("stat:compiled_active_layers={}\n", active_layers); + OSL::print("stat:compiled_network_depth={}\n", network_depth); + OSL::print("stat:compiled_texture_ops={}\n", texture_ops); + OSL::print("stat:compiled_noise_ops={}\n", noise_ops); + } + // Give the renderer a chance to do initial cleanup while everything is still alive rend->clear(); diff --git a/testsuite/compstats/layer_a.osl b/testsuite/compstats/layer_a.osl new file mode 100644 index 0000000000..2c7872ed7b --- /dev/null +++ b/testsuite/compstats/layer_a.osl @@ -0,0 +1,8 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader layer_a(output color Cout = 0) +{ + Cout = texture("test.tx", u, v); +} diff --git a/testsuite/compstats/layer_b.osl b/testsuite/compstats/layer_b.osl new file mode 100644 index 0000000000..55f12820e6 --- /dev/null +++ b/testsuite/compstats/layer_b.osl @@ -0,0 +1,8 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader layer_b(color Cin = 0, output color Cout = 0) +{ + Cout = Cin + noise("perlin", P) + noise("perlin", P * 2); +} diff --git a/testsuite/compstats/ref/out.txt b/testsuite/compstats/ref/out.txt new file mode 100644 index 0000000000..dee1c62fd9 --- /dev/null +++ b/testsuite/compstats/ref/out.txt @@ -0,0 +1,17 @@ + Shader compilation stats, post-optimized: + Active layers: min=2 max=2 median=2 + Top shader groups: + 2 layers "complex" + Network depth: min=2 max=2 median=2 + Top shader groups: + 2 depth "complex" + Texture ops: min=1 max=1 median=1 + Top shader groups: + 1 ops "complex" + Noise ops: min=2 max=2 median=2 + Top shader groups: + 2 ops "complex" +stat:compiled_active_layers=2 +stat:compiled_network_depth=2 +stat:compiled_texture_ops=1 +stat:compiled_noise_ops=2 diff --git a/testsuite/compstats/run.py b/testsuite/compstats/run.py new file mode 100644 index 0000000000..e9078d511e --- /dev/null +++ b/testsuite/compstats/run.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +# Copyright Contributors to the Open Shading Language project. +# SPDX-License-Identifier: BSD-3-Clause +# https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +# Test that per-group compile stats are recorded and reported in getstats(). +# +# "complex" group: layer_a (1 texture op) -> layer_b (2 noise ops) +# Expected: active_layers=2, network_depth=2, texture_ops=1, noise_ops=2 +# +# With statistics:level=1, getstats() should emit min/max/median and a +# ranked list for each metric. + +command = testshade( + "--options statistics:level=1" + " --groupname complex" + " --shader layer_a la" + " --shader layer_b lb" + " --connect la Cout lb Cin" + " -o Cout null" +) + +command += testshade( + "--print-group-stats" + " --groupname complex" + " --shader layer_a la" + " --shader layer_b lb" + " --connect la Cout lb Cin" + " -o Cout null" +) + +# Filter to only the new per-group ranked stats lines and getattribute +# stat key output; everything else is machine- or build-specific. +# Note: runtest uses re.match() (anchored at line start), so prefix with .* +filter_re = r".*(Shader compilation stats|Active layers|Network depth|Texture ops|Noise ops|Top shader groups|stat:)" diff --git a/testsuite/compstats/simple.osl b/testsuite/compstats/simple.osl new file mode 100644 index 0000000000..063b7c1e76 --- /dev/null +++ b/testsuite/compstats/simple.osl @@ -0,0 +1,8 @@ +// Copyright Contributors to the Open Shading Language project. +// SPDX-License-Identifier: BSD-3-Clause +// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage + +shader simple(output color Cout = 0) +{ + Cout = color(u, v, 0); +} diff --git a/testsuite/render-background/run.py b/testsuite/render-background/run.py index b3122b811c..956373a36f 100755 --- a/testsuite/render-background/run.py +++ b/testsuite/render-background/run.py @@ -11,4 +11,4 @@ idiff_program = "idiff" outputs = [ "out.exr" ] -command = testrender("-r 320 240 -aa 4 scene.xml out.exr") +command = testrender("-r 320 240 -aa 4 --options statistics:level=1 scene.xml out.exr")