diff --git a/README-cn.md b/README-cn.md index 5404fda..01ae8c6 100644 --- a/README-cn.md +++ b/README-cn.md @@ -85,7 +85,7 @@ cmake --build build-macos --target all #### 3. 运行示例应用程序 ```bash -./build/bin/system_test ../obj +./build/bin/system_test ./obj ``` --- diff --git a/README.md b/README.md index 95981fd..fab00dc 100755 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ cmake --build build-macos --target all #### 3. Run the Example Application ```bash -./build/bin/system_test ../obj +./build/bin/system_test ./obj ``` --- diff --git a/src/include/face.hpp b/src/include/face.hpp index 28a5b30..49f0754 100644 --- a/src/include/face.hpp +++ b/src/include/face.hpp @@ -40,7 +40,7 @@ class Face { // Get functions // 获取函数 inline const std::array& GetIndices() const { return indices_; } - inline const size_t GetIndex(size_t index) const { return indices_[index]; } + inline size_t GetIndex(size_t index) const { return indices_[index]; } inline const Material& GetMaterial() const { return material_; } private: diff --git a/src/include/log_system.h b/src/include/log_system.h index a1f2903..2f8d9c4 100755 --- a/src/include/log_system.h +++ b/src/include/log_system.h @@ -17,6 +17,9 @@ #ifndef SIMPLERENDER_SRC_INCLUDE_LOG_SYSTEM_H_ #define SIMPLERENDER_SRC_INCLUDE_LOG_SYSTEM_H_ +#ifndef SPDLOG_ACTIVE_LEVEL +#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_INFO +#endif #include namespace simple_renderer { diff --git a/src/include/rasterizer.hpp b/src/include/rasterizer.hpp index 749aa28..cd0b349 100644 --- a/src/include/rasterizer.hpp +++ b/src/include/rasterizer.hpp @@ -3,6 +3,7 @@ #include "config.h" #include "shader.hpp" +#include "vertex.hpp" namespace simple_renderer { @@ -15,21 +16,80 @@ class Rasterizer { auto operator=(Rasterizer&& rasterizer) -> Rasterizer& = default; ~Rasterizer() = default; + /** + * @brief 构造具有指定尺寸的光栅化器 + * @param width 光栅化器宽度 + * @param height 光栅化器高度 + */ Rasterizer(size_t width, size_t height); + /** + * @brief 光栅化三角形,生成片段列表 + * @param v0 三角形第一个顶点 + * @param v1 三角形第二个顶点 + * @param v2 三角形第三个顶点 + * @return 生成的片段向量 + */ std::vector Rasterize(const Vertex& v0, const Vertex& v1, const Vertex& v2); + /** + * @brief 非分配版本:将片段直接写入调用方提供的容器 + * + * 可选的裁剪区域为半开区间 [x0, x1) × [y0, y1) + * 用于 TBR:将光栅化限制在 tile 边界内,便于复用外部 scratch 容器 + * + * @param v0 三角形第一个顶点 + * @param v1 三角形第二个顶点 + * @param v2 三角形第三个顶点 + * @param x0 裁剪区域左边界(包含) + * @param y0 裁剪区域上边界(包含) + * @param x1 裁剪区域右边界(不包含) + * @param y1 裁剪区域下边界(不包含) + * @param out 输出片段容器 + */ + void RasterizeTo(const Vertex& v0, const Vertex& v1, const Vertex& v2, + int x0, int y0, int x1, int y1, + std::vector& out); + + /** + * @brief SoA 版本:按顶点索引从 SoA 读取三角形三顶点 + * @param soa 结构体数组格式的顶点数据 + * @param i0 三角形第一个顶点索引 + * @param i1 三角形第二个顶点索引 + * @param i2 三角形第三个顶点索引 + * @param x0 裁剪区域左边界(包含) + * @param y0 裁剪区域上边界(包含) + * @param x1 裁剪区域右边界(不包含) + * @param y1 裁剪区域下边界(不包含) + * @param out 输出片段容器 + */ + void RasterizeTo(const VertexSoA& soa, size_t i0, size_t i1, size_t i2, + int x0, int y0, int x1, int y1, + std::vector& out); + private: size_t width_, height_; + // 透视矫正结果 + struct PerspectiveCorrectionResult { + Vector3f corrected_barycentric; + float interpolated_z; + }; + + // 透视矫正helper函数 + PerspectiveCorrectionResult PerformPerspectiveCorrection( + float w0, float w1, float w2, + float z0, float z1, float z2, + const Vector3f& original_barycentric) const; + template T Interpolate(const T& v0, const T& v1, const T& v2, - const Vector3f& barycentric_coord); + const Vector3f& barycentric_coord) const; Color InterpolateColor(const Color& color0, const Color& color1, const Color& color2, - const Vector3f& barycentric_coord); + const Vector3f& barycentric_coord) const; std::pair GetBarycentricCoord(const Vector3f& p0, const Vector3f& p1, diff --git a/src/include/renderer.h b/src/include/renderer.h index bcc136f..e11c93f 100755 --- a/src/include/renderer.h +++ b/src/include/renderer.h @@ -18,57 +18,95 @@ #define SIMPLERENDER_SRC_INCLUDE_RENDERER_H_ #include -#include -#include +#include +#include -#include "buffer.hpp" -#include "light.h" #include "log_system.h" -#include "math.hpp" #include "model.hpp" -#include "rasterizer.hpp" #include "shader.hpp" +#include "renderers/renderer_base.hpp" namespace simple_renderer { +// 渲染模式枚举 +/** + * @brief 渲染模式 + * - PER_TRIANGLE: 逐三角形(triangle-major)前向渲染 + * - TILE_BASED: 基于 tile(tile-major)前向渲染 + * - DEFERRED: 延迟渲染(片段收集后再着色) + */ +enum class RenderingMode { + PER_TRIANGLE, //!< 逐三角形(triangle-major) + TILE_BASED, //!< 基于 tile(tile-major) + DEFERRED //!< 延迟渲染 +}; + +/** + * @brief 将渲染模式枚举转为可读字符串 + * @param mode 渲染模式 + * @return 可读字符串(PER_TRIANGLE/TILE_BASED/DEFERRED) + */ +std::string RenderingModeToString(RenderingMode mode); + +/** + * @brief 渲染门面(Facade) + * + * 职责: + * - 仅作为模式选择与调用入口; + * - 根据 `RenderingMode` 构造并持有具体渲染器; + * - 对外暴露统一的 `DrawModel` 接口。 + */ class SimpleRenderer { public: /** - * 构造函数 - * @param width - * @param height - * @param buffer 要进行绘制的内存区域,大小为 width*height*sizeof(uint32_t) - * @param + * @brief 构造渲染器门面 + * @param width 画布宽度(像素) + * @param height 画布高度(像素) */ SimpleRenderer(size_t width, size_t height); + ~SimpleRenderer() = default; - /// @name 默认构造/析构函数 - /// @{ - SimpleRenderer(const SimpleRenderer &_simplerenderer) = default; - SimpleRenderer(SimpleRenderer &&_simplerenderer) = default; - auto operator=(const SimpleRenderer &_simplerenderer) -> SimpleRenderer & = - default; - auto operator=(SimpleRenderer &&_simplerenderer) -> SimpleRenderer & = - default; - virtual ~SimpleRenderer() = default; - /// @} + /** + * @brief 绘制单个模型 + * @param model 模型 + * @param shader 着色器(含 uniform) + * @param buffer 输出颜色缓冲(width*height) + * @return 是否成功 + */ + bool DrawModel(const Model &model, const Shader &shader, uint32_t *buffer); - bool Render(const Model &model, const Shader &shader, uint32_t *buffer); + /** + * @brief 设置渲染模式 + */ + void SetRenderingMode(RenderingMode mode); + /** + * @brief 获取当前渲染模式 + */ + RenderingMode GetRenderingMode() const; + + // 可选:配置参数(仅对 TileBasedRenderer 生效;运行中修改将重建 TBR 实例) + /** + * @brief 启用或禁用 Early‑Z(仅 TBR 有效) + */ + void SetEarlyZEnabled(bool enabled); + /** + * @brief 设置 Tile 大小(仅 TBR 有效) + */ + void SetTileSize(size_t tile_size); + + private: + void EnsureRenderer(); private: const size_t height_; const size_t width_; LogSystem log_system_; + RenderingMode current_mode_; + std::unique_ptr renderer_; - std::shared_ptr shader_; - std::shared_ptr rasterizer_; - - /** - * 绘制模型 - * @param model 模型 - */ - void DrawModel(const Model &model, uint32_t *buffer); - void DrawModelSlower(const Model &model, uint32_t *buffer); + // TBR 配置缓存:在创建 TileBasedRenderer 时下发 + bool tbr_early_z_ = true; + size_t tbr_tile_size_ = 64; }; } // namespace simple_renderer diff --git a/src/include/renderers/deferred_renderer.hpp b/src/include/renderers/deferred_renderer.hpp new file mode 100644 index 0000000..245f5f8 --- /dev/null +++ b/src/include/renderers/deferred_renderer.hpp @@ -0,0 +1,31 @@ +#ifndef SIMPLERENDER_SRC_INCLUDE_RENDERERS_DEFERRED_RENDERER_HPP_ +#define SIMPLERENDER_SRC_INCLUDE_RENDERERS_DEFERRED_RENDERER_HPP_ + +#include "renderers/renderer_base.hpp" + +namespace simple_renderer { + +/** + * @brief 延迟渲染器(Deferred) + * + * 组织处理方式模拟 OpenGL 在 GPU上的工作原理,模仿 GPU管线。 + * 但相比于另外两个前向渲染实现,导致内存使用增加和渲染速度变慢。 + * + * 特点: + * - AoS 顶点路径; + * - 首先按像素收集所有片段并选择最近深度; + * - 再对选择的片段执行片段着色(模拟经典 GPU 管线的一种教学实现)。 + * - + */ +class DeferredRenderer final : public RendererBase { + public: + using RendererBase::RendererBase; + /** + * @copydoc RendererBase::Render + */ + bool Render(const Model& model, const Shader& shader, uint32_t* out_color) override; +}; + +} // namespace simple_renderer + +#endif // SIMPLERENDER_SRC_INCLUDE_RENDERERS_DEFERRED_RENDERER_HPP_ diff --git a/src/include/renderers/per_triangle_renderer.hpp b/src/include/renderers/per_triangle_renderer.hpp new file mode 100644 index 0000000..e2cee62 --- /dev/null +++ b/src/include/renderers/per_triangle_renderer.hpp @@ -0,0 +1,28 @@ +#ifndef SIMPLERENDER_SRC_INCLUDE_RENDERERS_PER_TRIANGLE_RENDERER_HPP_ +#define SIMPLERENDER_SRC_INCLUDE_RENDERERS_PER_TRIANGLE_RENDERER_HPP_ + +#include "renderers/renderer_base.hpp" + +namespace simple_renderer { + +/** + * @brief 逐三角形渲染器(Triangle‑Major) + * + * 特点: + * - AoS 顶点路径; + * - 每线程本地 framebuffer(depth/color)合并; + * - 背面剔除在屏幕空间完成; + * - 接近“传统”栈式前向渲染教学实现。 + */ +class PerTriangleRenderer final : public RendererBase { + public: + using RendererBase::RendererBase; + /** + * @copydoc RendererBase::Render + */ + bool Render(const Model& model, const Shader& shader, uint32_t* out_color) override; +}; + +} // namespace simple_renderer + +#endif // SIMPLERENDER_SRC_INCLUDE_RENDERERS_PER_TRIANGLE_RENDERER_HPP_ diff --git a/src/include/renderers/renderer_base.hpp b/src/include/renderers/renderer_base.hpp new file mode 100644 index 0000000..ad09ac7 --- /dev/null +++ b/src/include/renderers/renderer_base.hpp @@ -0,0 +1,66 @@ +// Renderer base and options +#ifndef SIMPLERENDER_SRC_INCLUDE_RENDERERS_RENDERER_BASE_HPP_ +#define SIMPLERENDER_SRC_INCLUDE_RENDERERS_RENDERER_BASE_HPP_ + +#include +#include + +#include "rasterizer.hpp" +#include "vertex.hpp" +#include "model.hpp" +#include "shader.hpp" + +namespace simple_renderer { + + +/** + * @brief 渲染器抽象基类 + * + * 约定: + * - Render 负责完成完整的渲染过程(顶点变换 + 光栅化 + 着色 + 写入输出缓冲)。 + * - 子类选择不同的“组织单元”:(按照并行组织单元)逐三角形、按 tile、或延迟管线。 + * - 公共的透视除法与视口变换在此提供,子类按需复用。 + */ +class RendererBase { + public: + RendererBase(size_t width, size_t height) + : width_(width), height_(height), rasterizer_(std::make_shared(width, height)) {} + virtual ~RendererBase() = default; + + RendererBase(const RendererBase&) = delete; + RendererBase& operator=(const RendererBase&) = delete; + + /** + * @brief 执行一次渲染 + * @param model 模型数据 + * @param shader 着色器(包含材质/光照/矩阵等 uniform) + * @param out_color 输出颜色缓冲(大小为 width*height) + * @return 是否渲染成功 + */ + virtual bool Render(const Model& model, const Shader& shader, uint32_t* out_color) = 0; + + protected: + /** + * @brief 透视除法:裁剪空间 -> NDC + * @param vertex 裁剪空间顶点 + * @return NDC 顶点(保留 1/w 以供透视校正) + */ + Vertex PerspectiveDivision(const Vertex& vertex); + /** + * @brief 视口变换:NDC -> 屏幕坐标 + * @param vertex NDC 顶点 + * @return 屏幕空间顶点 + */ + Vertex ViewportTransformation(const Vertex& vertex); + + protected: + size_t width_; + size_t height_; + std::shared_ptr rasterizer_; + + static constexpr float kMinWValue = 1e-6f; +}; + +} // namespace simple_renderer + +#endif // SIMPLERENDER_SRC_INCLUDE_RENDERERS_RENDERER_BASE_HPP_ diff --git a/src/include/renderers/tile_based_renderer.hpp b/src/include/renderers/tile_based_renderer.hpp new file mode 100644 index 0000000..da7970c --- /dev/null +++ b/src/include/renderers/tile_based_renderer.hpp @@ -0,0 +1,130 @@ +#ifndef SIMPLERENDER_SRC_INCLUDE_RENDERERS_TILE_BASED_RENDERER_HPP_ +#define SIMPLERENDER_SRC_INCLUDE_RENDERERS_TILE_BASED_RENDERER_HPP_ + +#include "renderers/renderer_base.hpp" + +namespace simple_renderer { + +/** + * @brief Tile 中的三角形轻量引用(SoA 索引 + 材质指针) + */ +struct TileTriangleRef { + size_t i0, i1, i2; + const Material* material = nullptr; + size_t face_index = 0; +}; + +struct TileMaskStats { + uint64_t tested = 0; // 遍历检测像素总数 + uint64_t covered = 0; // 三角形内覆盖测试通过像素数(通过边函数做内点测试成功) + uint64_t zpass = 0; // 通过early-z测试像素数(深度值小于tile局部深度缓冲) + uint64_t shaded = 0; // 实际着色并写回像素数(同时通过early-z或late-z测试) +}; + +/** + * @brief Tile 网格上下文(供 binning 和 raster 共享的网格/几何信息) + */ +struct TileGridContext { + const VertexSoA& soa; + size_t tiles_x; + size_t tiles_y; + size_t tile_size; +}; + +/** + * @brief 基于 Tile 的渲染器(Tile‑Major) + * + * 特点: + * - SoA 顶点布局; + * - 三角形按 tile 分箱(binning),每 tile 内局部 Early‑Z; + * - 单份全局 framebuffer,按 tile 覆盖范围直接拷贝回写; + * - 通过构造参数 early_z 与 tile_size 控制行为。 + */ +class TileBasedRenderer final : public RendererBase { + public: + /** + * @brief 构造函数 + * @param width 画布宽度 + * @param height 画布高度 + * @param early_z 是否启用 Early‑Z(默认启用) + * @param tile_size Tile 像素尺寸(默认 64) + */ + TileBasedRenderer(size_t width, size_t height, bool early_z = true, size_t tile_size = 64) + : RendererBase(width, height), early_z_(early_z), tile_size_(tile_size) {} + /** + * @copydoc RendererBase::Render + */ + bool Render(const Model& model, const Shader& shader, uint32_t* out_color) override; + + private: + /** + * @brief 将三角形按屏幕空间包围盒映射到 tile 网格 + * @param model 模型(提供面/材质) + * @param soa 经过变换后的 SoA 顶点数据 + * @param tile_triangles 输出:每个 tile 的三角形引用列表 + * @param tiles_x 水平 tile 数 + * @param tiles_y 垂直 tile 数 + * @param tile_size tile 像素尺寸 + */ + void TriangleTileBinning(const Model& model, + const TileGridContext& grid, + std::vector> &tile_triangles); + + /** + * @brief 处理单个三角形的 tile binning 逻辑 + * @param tri_idx 三角形索引 + * @param count_only 是否仅进行计数(true=计数模式,false=填充模式) + * @param model 模型数据 + * @param soa 经过变换后的 SoA 顶点数据 + * @param tiles_x 水平 tile 数 + * @param tiles_y 垂直 tile 数 + * @param tile_size tile 像素尺寸 + * @param tile_counts tile 计数数组的引用(计数模式时使用) + * @param tile_triangles tile 三角形引用列表(填充模式时使用) + */ + void ProcessTriangleForTileBinning( + size_t tri_idx, bool count_only, + const Model& model, + const TileGridContext& grid, + std::vector& tile_counts, + std::vector>& tile_triangles); + + /** + * @brief 光栅化单个 tile,并将结果写回全局 framebuffer + * @param tile_id tile 序号 + * @param triangles 该 tile 覆盖的三角形引用 + * @param tiles_x 水平 tile 数 + * @param tiles_y 垂直 tile 数 + * @param tile_size tile 像素尺寸 + * @param tile_depth_buffer tile 局部深度缓冲(由调用方提供/复用) + * @param tile_color_buffer tile 局部颜色缓冲(由调用方提供/复用) + * @param global_depth_buffer 全局深度缓冲(单份) + * @param global_color_buffer 全局颜色缓冲(单份) + * @param soa 经过变换后的 SoA 顶点数据 + * @param shader 着色器 + * @param use_early_z 是否启用 Early‑Z + * @param scratch_fragments 可复用片段临时容器 + */ + void RasterizeTile(size_t tile_id, + const std::vector &triangles, + const TileGridContext& grid, + float* tile_depth_buffer, uint32_t* tile_color_buffer, + std::unique_ptr &global_depth_buffer, + std::unique_ptr &global_color_buffer, + const Shader& shader, + bool use_early_z, + std::vector* scratch_fragments, + TileMaskStats* out_stats); + + private: + // 深度和颜色的默认值,同时用于tile级和全局级buffers的初始化 + static constexpr float kDepthClear = 1.0f; // 默认为最远值,用于Early-Z + static constexpr uint32_t kColorClear = 0u; // 默认为黑色 + + const bool early_z_; + const size_t tile_size_; +}; + +} // namespace simple_renderer + +#endif // SIMPLERENDER_SRC_INCLUDE_RENDERERS_TILE_BASED_RENDERER_HPP_ diff --git a/src/include/shader.hpp b/src/include/shader.hpp index ed08998..8314f55 100644 --- a/src/include/shader.hpp +++ b/src/include/shader.hpp @@ -1,6 +1,10 @@ #ifndef SIMPLERENDER_SRC_INCLUDE_SHADER_HPP_ #define SIMPLERENDER_SRC_INCLUDE_SHADER_HPP_ +#include +#include +#include +#include #include #include "light.h" @@ -12,6 +16,8 @@ namespace simple_renderer { using UniformValue = std::variant; +inline constexpr size_t kSpecularLutResolution = 256; + class UniformBuffer { public: template @@ -63,6 +69,32 @@ struct SharedDataInShader { Vector3f fragPos_varying = Vector3f(0.0f); }; +struct VertexUniformCache { + Matrix4f model = Matrix4f(1.0f); + Matrix4f view = Matrix4f(1.0f); + Matrix4f projection = Matrix4f(1.0f); + Matrix4f model_view = Matrix4f(1.0f); + Matrix4f mvp = Matrix4f(1.0f); + Matrix3f normal = Matrix3f(1.0f); + bool has_model = false; + bool has_view = false; + bool has_projection = false; + bool derived_valid = false; +}; + +struct FragmentUniformCache { + Light light{}; + Vector3f camera_pos = Vector3f(0.0f); + Vector3f light_dir_normalized = Vector3f(0.0f); + bool has_light = false; + bool has_camera = false; + bool derived_valid = false; +}; + +struct SpecularLUT { + std::array values{}; +}; + /** * @brief Shader Class 着色器类 * @@ -70,10 +102,10 @@ struct SharedDataInShader { class Shader { public: Shader() = default; - Shader(const Shader &shader) = default; - Shader(Shader &&shader) = default; - auto operator=(const Shader &shader) -> Shader & = default; - auto operator=(Shader &&shader) -> Shader & = default; + Shader(const Shader &shader); + Shader(Shader &&shader) noexcept; + auto operator=(const Shader &shader) -> Shader &; + auto operator=(Shader &&shader) noexcept -> Shader &; virtual ~Shader() = default; // Input Data -> Vertex Shader -> Screen Space Coordiante @@ -85,8 +117,17 @@ class Shader { template void SetUniform(const std::string &name, const T &value) { uniformbuffer_.SetUniform(name, value); + if constexpr (std::is_same_v) { + UpdateMatrixCache(name, value); + } else if constexpr (std::is_same_v) { + UpdateFragmentCache(name, value); + } else if constexpr (std::is_same_v) { + UpdateFragmentCache(name, value); + } } + void PrepareUniformCaches(); + private: // UniformBuffer UniformBuffer uniformbuffer_; @@ -94,6 +135,23 @@ class Shader { // Shared Variables // 共享变量 SharedDataInShader sharedDataInShader_; + VertexUniformCache vertex_uniform_cache_; + FragmentUniformCache fragment_uniform_cache_; + mutable std::unordered_map specular_lut_cache_; + mutable std::shared_mutex specular_cache_mutex_; + + void UpdateMatrixCache(const std::string &name, const Matrix4f &value); + void UpdateFragmentCache(const std::string &name, const Light &value); + void UpdateFragmentCache(const std::string &name, const Vector3f &value); + void RecalculateDerivedMatrices(); + void RecalculateFragmentDerived(); + void PrepareVertexUniformCache(); + void PrepareFragmentUniformCache(); + + // LUT相关 + [[nodiscard]] auto BuildSpecularLUT(float shininess) const -> SpecularLUT; + [[nodiscard]] auto GetSpecularLUT(float shininess) const -> const SpecularLUT &; + [[nodiscard]] auto EvaluateSpecular(float cos_theta, float shininess) const -> float; Color SampleTexture(const Texture &texture, const Vector2f &uv) const; Color ClampColor(const Color color) const; @@ -103,4 +161,4 @@ uint8_t FloatToUint8_t(float val); } // namespace simple_renderer -#endif /* SIMPLERENDER_SRC_INCLUDE_SHADER_H_ */ \ No newline at end of file +#endif /* SIMPLERENDER_SRC_INCLUDE_SHADER_H_ */ diff --git a/src/include/vertex.hpp b/src/include/vertex.hpp index 975abd0..b00f648 100644 --- a/src/include/vertex.hpp +++ b/src/include/vertex.hpp @@ -1,6 +1,9 @@ #ifndef SIMPLERENDER_SRC_INCLUDE_VERTEX_HPP_ #define SIMPLERENDER_SRC_INCLUDE_VERTEX_HPP_ +#include +#include + #include #include "color.h" @@ -31,10 +34,13 @@ class Vertex { // 析构函数 ~Vertex() = default; - // Constructor with parameters 带参数的构造函数 + // Constructor with parameters: optional clip space coordinate + // 带参数的构造函数:可选的裁剪空间坐标 explicit Vertex(const Vector4f& pos, const Vector3f& norm, - const Vector2f& tex, const Color& color_) - : position_(pos), normal_(norm), texCoords_(tex), color_(color_) {} + const Vector2f& tex, const Color& color_, + std::optional clip_pos = std::nullopt) + : position_(pos), normal_(norm), texCoords_(tex), color_(color_), + clip_position_(clip_pos) {} // Transform the vertex with a matrix 使用矩阵变换顶点 void transform(const Matrix4f& matrix) { position_ = matrix * position_; } @@ -45,12 +51,19 @@ class Vertex { [[nodiscard]] inline Vector3f GetNormal() const { return normal_; } [[nodiscard]] inline Vector2f GetTexCoords() const { return texCoords_; } [[nodiscard]] inline Color GetColor() const { return color_; } + + // 扩展坐标访问 + [[nodiscard]] inline std::optional GetClipPosition() const { return clip_position_; } + [[nodiscard]] inline bool HasClipPosition() const { return clip_position_.has_value(); } private: Vector4f position_; // 3D position, 3D顶点坐标 Vector3f normal_; // Normal vector, 顶点法向量 Vector2f texCoords_; // Texture coordinates, 顶点纹理坐标 Color color_; + + // 扩展坐标用于裁剪优化 + std::optional clip_position_; // 裁剪空间坐标 (用于视锥体裁剪) }; inline Vertex operator*(const Matrix4f& matrix, const Vertex& vertex) { @@ -59,6 +72,26 @@ inline Vertex operator*(const Matrix4f& matrix, const Vertex& vertex) { vertex.GetColor()); } +// Minimal SoA layout for TBR pipeline +struct VertexSoA { + // 屏幕空间坐标(视口变换后) + std::vector pos_screen; // screen space position (x,y,z,w) + // 裁剪空间坐标(用于视锥体剔除):clip = MVP * pos + std::vector pos_clip; + std::vector normal; + std::vector uv; + std::vector color; + + inline size_t size() const { return pos_screen.size(); } + inline void resize(size_t n) { + pos_screen.resize(n); + pos_clip.resize(n); + normal.resize(n); + uv.resize(n); + color.resize(n); + } +}; + } // namespace simple_renderer #endif \ No newline at end of file diff --git a/src/light.cpp b/src/light.cpp index f25fb4c..ae3a51d 100644 --- a/src/light.cpp +++ b/src/light.cpp @@ -27,7 +27,7 @@ const Vector3f Light::kDefaultDir = Vector3f(0, 0, -1); const Color Light::kDefaultColor = Color::kWhite; Light::Light(const std::string &name) : name_(name) { - SPDLOG_INFO("Light: {}", name_); + SPDLOG_DEBUG("Light: {}", name_); } } // namespace simple_renderer diff --git a/src/rasterizer.cpp b/src/rasterizer.cpp index 8bf2d34..04aa6b1 100644 --- a/src/rasterizer.cpp +++ b/src/rasterizer.cpp @@ -1,12 +1,14 @@ #include "rasterizer.hpp" #include +#include +#include namespace simple_renderer { Rasterizer::Rasterizer(size_t width, size_t height) : width_(width), height_(height) { - SPDLOG_INFO("Rasterizer init with {}, {}", width, height); + SPDLOG_DEBUG("Rasterizer init with {}, {}", width, height); } std::vector Rasterizer::Rasterize(const Vertex& v0, const Vertex& v1, @@ -46,18 +48,25 @@ std::vector Rasterizer::Rasterize(const Vertex& v0, const Vertex& v1, if (!is_inside) { continue; } - // 计算该点的深度,通过重心坐标插值计算 - auto z = Interpolate(v0.GetPosition().z, v1.GetPosition().z, - v2.GetPosition().z, barycentric_coord); + + // 透视矫正插值 + auto perspective_result = PerformPerspectiveCorrection( + v0.GetPosition().w, v1.GetPosition().w, v2.GetPosition().w, + v0.GetPosition().z, v1.GetPosition().z, v2.GetPosition().z, + barycentric_coord); + + const Vector3f& corrected_bary = perspective_result.corrected_barycentric; + float z = perspective_result.interpolated_z; + Fragment fragment; fragment.screen_coord = {x, y}; - fragment.normal = CalculateNormal(v0.GetPosition(), v1.GetPosition(), - v2.GetPosition()); + fragment.normal = Interpolate(v0.GetNormal(), v1.GetNormal(), + v2.GetNormal(), corrected_bary); fragment.uv = Interpolate(v0.GetTexCoords(), v1.GetTexCoords(), - v2.GetTexCoords(), barycentric_coord); + v2.GetTexCoords(), corrected_bary); fragment.color = InterpolateColor(v0.GetColor(), v1.GetColor(), - v2.GetColor(), barycentric_coord); + v2.GetColor(), corrected_bary); fragment.depth = z; local_fragments.push_back(fragment); @@ -72,6 +81,159 @@ std::vector Rasterizer::Rasterize(const Vertex& v0, const Vertex& v1, return fragments; } +void Rasterizer::RasterizeTo(const Vertex& v0, const Vertex& v1, const Vertex& v2, + int x0, int y0, int x1, int y1, + std::vector& out) { + // 获取三角形的最小 box(屏幕空间) + const Vector4f p0 = v0.GetPosition(); + const Vector4f p1 = v1.GetPosition(); + const Vector4f p2 = v2.GetPosition(); + + Vector2f a(p0.x, p0.y); + Vector2f b(p1.x, p1.y); + Vector2f c(p2.x, p2.y); + + Vector2f bboxMin = Vector2f{std::min({a.x, b.x, c.x}), std::min({a.y, b.y, c.y})}; + Vector2f bboxMax = Vector2f{std::max({a.x, b.x, c.x}), std::max({a.y, b.y, c.y})}; + + // Clamp 到屏幕尺寸 + float minx = std::max(0.0f, bboxMin.x); + float miny = std::max(0.0f, bboxMin.y); + float maxx = std::min(float(width_ - 1), bboxMax.x); + float maxy = std::min(float(height_ - 1), bboxMax.y); + + // 与外部提供的裁剪区域相交(半开区间) -> 闭区间扫描 + int sx = std::max(x0, static_cast(std::floor(minx))); + int sy = std::max(y0, static_cast(std::floor(miny))); + int ex = std::min(x1 - 1, static_cast(std::floor(maxx))); + int ey = std::min(y1 - 1, static_cast(std::floor(maxy))); + if (sx > ex || sy > ey) return; + + for (int x = sx; x <= ex; ++x) { + for (int y = sy; y <= ey; ++y) { + auto [is_inside, bary] = GetBarycentricCoord( + Vector3f(p0.x, p0.y, p0.z), Vector3f(p1.x, p1.y, p1.z), Vector3f(p2.x, p2.y, p2.z), + Vector3f(static_cast(x), static_cast(y), 0)); + if (!is_inside) continue; + + // 透视矫正插值 + auto perspective_result = PerformPerspectiveCorrection( + p0.w, p1.w, p2.w, + p0.z, p1.z, p2.z, + bary); + + const Vector3f& corrected_bary = perspective_result.corrected_barycentric; + float z = perspective_result.interpolated_z; + + Fragment frag; // material 指针由调用方填写 + frag.screen_coord = {x, y}; + frag.normal = Interpolate(v0.GetNormal(), v1.GetNormal(), v2.GetNormal(), corrected_bary); + frag.uv = Interpolate(v0.GetTexCoords(), v1.GetTexCoords(), v2.GetTexCoords(), corrected_bary); + frag.color = InterpolateColor(v0.GetColor(), v1.GetColor(), v2.GetColor(), corrected_bary); + frag.depth = z; + + out.push_back(frag); + } + } +} + +void Rasterizer::RasterizeTo(const VertexSoA& soa, size_t i0, size_t i1, size_t i2, + int x0, int y0, int x1, int y1, + std::vector& out) { + // 读取三顶点的屏幕空间位置 + const Vector4f& p0 = soa.pos_screen[i0]; + const Vector4f& p1 = soa.pos_screen[i1]; + const Vector4f& p2 = soa.pos_screen[i2]; + + // 为BarycentricCoord预构造Vec3f,避免循环内重复构造 + const Vector3f sp0(p0.x, p0.y, p0.z); + const Vector3f sp1(p1.x, p1.y, p1.z); + const Vector3f sp2(p2.x, p2.y, p2.z); + + // 计算屏幕空间AABB包围盒 + const float minx_f = std::max(0.0f, std::min({p0.x, p1.x, p2.x})); + const float miny_f = std::max(0.0f, std::min({p0.y, p1.y, p2.y})); + const float maxx_f = std::min(float(width_ - 1), std::max({p0.x, p1.x, p2.x})); + const float maxy_f = std::min(float(height_ - 1), std::max({p0.y, p1.y, p2.y})); + + // 与外部提供的裁剪区域相交(半开区间) -> 闭区间扫描 + int sx = std::max(x0, static_cast(std::floor(minx_f))); + int sy = std::max(y0, static_cast(std::floor(miny_f))); + int ex = std::min(x1 - 1, static_cast(std::floor(maxx_f))); + int ey = std::min(y1 - 1, static_cast(std::floor(maxy_f))); + if (sx > ex || sy > ey) return; + + // 预计算边函数系数:E(x,y) = A*x + B*y + C + // 使用相对坐标的边函数定义,避免大常数项导致的数值不稳定 + // 如使用绝对形式Ax+By+C会由于常数C的量级过大,造成浮点抵消,有效位丢失不稳定 + auto cross2 = [](float ax, float ay, float bx, float by) { + return ax * by - ay * bx; + }; + // 边向量 + const float e01x = p1.x - p0.x, e01y = p1.y - p0.y; // (p0->p1) + const float e12x = p2.x - p1.x, e12y = p2.y - p1.y; // (p1->p2) + const float e20x = p0.x - p2.x, e20y = p0.y - p2.y; // (p2->p0) + + // 有向面积(两倍),用相对面积定义:area2 = cross(p1 - p0, p2 - p0) + float area2 = cross2(e01x, e01y, p2.x - p0.x, p2.y - p0.y); + if (std::abs(area2) < 1e-6f) return; // 退化三角形 + const float inv_area2 = 1.0f / area2; + const bool positive = (area2 > 0.0f); + + // 行优先遍历:有利于 cache 与向量化 + #pragma omp simd + for (int y = sy; y <= ey; ++y) { + const float yf = static_cast(y); + + // 注意:此处存在对 out.push_back 的写入,属于有副作用操作,不适合使用 + // omp simd 进行强制向量化,否则可能导致不符合预期的行为(如周期性伪影)。 + // 先保持标量内层,后续如切换为“直写像素回调”再考虑安全的 SIMD 化。 + for (int x = sx; x <= ex; ++x) { + const float xf = static_cast(x); + + // 相对坐标边函数: + // E01(p) = cross(p1 - p0, p - p0) + // E12(p) = cross(p2 - p1, p - p1) + // E20(p) = cross(p0 - p2, p - p2) + const float E01 = cross2(e01x, e01y, xf - p0.x, yf - p0.y); + const float E12 = cross2(e12x, e12y, xf - p1.x, yf - p1.y); + const float E20 = cross2(e20x, e20y, xf - p2.x, yf - p2.y); + + // 半空间测试(根据朝向选择符号) + const bool inside = positive ? (E01 >= 0.0f && E12 >= 0.0f && E20 >= 0.0f) + : (E01 <= 0.0f && E12 <= 0.0f && E20 <= 0.0f); + if (!inside) continue; + + // 重心权重映射: + // b0 对应 v0,取与对边 (v1,v2) 的子面积 → E12 + // b1 对应 v1 → E20 + // b2 对应 v2 → E01 + const float b0 = E12 * inv_area2; + const float b1 = E20 * inv_area2; + const float b2 = E01 * inv_area2; + const Vector3f bary(b0, b1, b2); + + // 透视矫正插值 + auto perspective_result = PerformPerspectiveCorrection( + p0.w, p1.w, p2.w, + p0.z, p1.z, p2.z, + bary); + + const Vector3f& corrected_bary = perspective_result.corrected_barycentric; + const float z = perspective_result.interpolated_z; + + Fragment frag; // Note: material 指针由调用方填写 + frag.screen_coord = {x, y}; + frag.normal = Interpolate(soa.normal[i0], soa.normal[i1], soa.normal[i2], corrected_bary); + frag.uv = Interpolate(soa.uv[i0], soa.uv[i1], soa.uv[i2], corrected_bary); + frag.color = InterpolateColor(soa.color[i0], soa.color[i1], soa.color[i2], corrected_bary); + frag.depth = z; + + out.push_back(frag); + } + } +} + std::pair Rasterizer::GetBarycentricCoord(const Vector3f& p0, const Vector3f& p1, const Vector3f& p2, @@ -98,17 +260,17 @@ std::pair Rasterizer::GetBarycentricCoord(const Vector3f& p0, return std::pair{true, Vector3f(x, y, z)}; } - + template T Rasterizer::Interpolate(const T& v0, const T& v1, const T& v2, - const Vector3f& barycentric_coord) { + const Vector3f& barycentric_coord) const { return v0 * barycentric_coord.x + v1 * barycentric_coord.y + v2 * barycentric_coord.z; } Color Rasterizer::InterpolateColor(const Color& color0, const Color& color1, const Color& color2, - const Vector3f& barycentric_coord) { + const Vector3f& barycentric_coord) const { auto color_r = FloatToUint8_t( static_cast(color0[Color::kColorIndexRed]) * barycentric_coord.x + static_cast(color1[Color::kColorIndexRed]) * barycentric_coord.y + @@ -127,6 +289,31 @@ Color Rasterizer::InterpolateColor(const Color& color0, const Color& color1, return Color(color_r, color_g, color_b); } +// 透视矫正helper函数:在透视投影下,1/w 在屏幕空间中是线性的// 因此需要先对 1/w 进行插值,再用结果矫正其他属性 +Rasterizer::PerspectiveCorrectionResult Rasterizer::PerformPerspectiveCorrection( + float w0, float w1, float w2, + float z0, float z1, float z2, + const Vector3f& original_barycentric) const { + + // 1. 插值 1/w (注意:这里传入的w0,w1,w2是原始的w值,需要先求倒数) + float w0_inv = 1.0f / w0; + float w1_inv = 1.0f / w1; + float w2_inv = 1.0f / w2; + float w_inv_interpolated = Interpolate(w0_inv, w1_inv, w2_inv, original_barycentric); + + // 2. 计算透视矫正的重心坐标 + Vector3f corrected_barycentric( + original_barycentric.x * w0_inv / w_inv_interpolated, + original_barycentric.y * w1_inv / w_inv_interpolated, + original_barycentric.z * w2_inv / w_inv_interpolated + ); + + // 3. 使用矫正的重心坐标插值深度值 + float interpolated_z = Interpolate(z0, z1, z2, corrected_barycentric); + + return {corrected_barycentric, interpolated_z}; +} + // Calculate the normal vector based on the vertices // 根据顶点计算法向量 Vector3f Rasterizer::CalculateNormal(const Vector3f& v0, const Vector3f& v1, @@ -139,4 +326,4 @@ Vector3f Rasterizer::CalculateNormal(const Vector3f& v0, const Vector3f& v1, glm::cross(edge1, edge2)); } -} // namespace simple_renderer \ No newline at end of file +} // namespace simple_renderer diff --git a/src/renderer.cpp b/src/renderer.cpp old mode 100755 new mode 100644 index c7a5769..0939cf5 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -1,277 +1,83 @@ - -/** - * @file simple_renderer.cpp - * @brief SimpleRenderer 实现 - * @author Zone.N (Zone.Niuzh@hotmail.com) - * @version 1.0 - * @date 2023-10-23 - * @copyright MIT LICENSE - * https://github.com/Simple-XX/SimpleRenderer - * @par change log: - * - *
DateAuthorDescription - *
2023-10-23Zone.N创建文件 - *
- */ - #include "renderer.h" -#include - -#include -#include -#include -#include -#include -#include +#include #include "config.h" -#include "light.h" -#include "log_system.h" -#include "model.hpp" +#include "renderers/per_triangle_renderer.hpp" +#include "renderers/tile_based_renderer.hpp" +#include "renderers/deferred_renderer.hpp" namespace simple_renderer { +std::string RenderingModeToString(RenderingMode mode) { + switch(mode) { + case RenderingMode::PER_TRIANGLE: return "PER_TRIANGLE"; + case RenderingMode::TILE_BASED: return "TILE_BASED"; + case RenderingMode::DEFERRED: return "DEFERRED"; + } + return "PER_TRIANGLE"; +} + SimpleRenderer::SimpleRenderer(size_t width, size_t height) : height_(height), width_(width), - log_system_(LogSystem(kLogFilePath, kLogFileMaxSize, kLogFileMaxCount)) { - rasterizer_ = std::make_shared(width, height); + log_system_(LogSystem(kLogFilePath, kLogFileMaxSize, kLogFileMaxCount)), + current_mode_(RenderingMode::TILE_BASED) { + tbr_early_z_ = true; + tbr_tile_size_ = 64; + EnsureRenderer(); } -bool SimpleRenderer::Render(const Model &model, const Shader &shader, - uint32_t *buffer) { - SPDLOG_INFO("render model: {}", model.GetModelPath()); - shader_ = std::make_shared(shader); - DrawModel(model, buffer); - return true; +bool SimpleRenderer::DrawModel(const Model &model, const Shader &shader, uint32_t *buffer) { + EnsureRenderer(); // 确保渲染器实例存在 + SPDLOG_DEBUG("draw model: {}", model.GetModelPath()); + return renderer_->Render(model, shader, buffer); } -/* -Optimizes performance by performing depth testing during rasterization, keeping -only the closest fragment per pixel, and avoiding storing all -fragments—resulting in faster rendering. - -通过在光栅化过程中执行深度测试,仅保留每个像素的深度值最近的片段,避免存储所有片段,从而优化性能,实现更快的渲染。 -*/ -void SimpleRenderer::DrawModel(const Model &model, uint32_t *buffer) { - SPDLOG_INFO("draw {}", model.GetModelPath()); - - /* * * Vertex Shader * * */ - std::vector processedVertices; - std::vector> processed_vertices_all_thread(kNProc); -#pragma omp parallel num_threads(kNProc) default(none) \ - shared(shader_, processed_vertices_all_thread) firstprivate(model) - { - int thread_id = omp_get_thread_num(); - std::vector &processedVertices_per_thread = - processed_vertices_all_thread[thread_id]; - -#pragma omp for - for (const auto &v : model.GetVertices()) { - auto vertex = shader_->VertexShader(v); - processedVertices_per_thread.push_back(vertex); - } - } - - for (const auto &processedVertices_per_thread : - processed_vertices_all_thread) { - processedVertices.insert(processedVertices.end(), - processedVertices_per_thread.begin(), - processedVertices_per_thread.end()); - } - /* * * * * * * */ - - /* * * Rasterization * * */ - std::vector> depthBuffer_all_thread(kNProc); - std::vector> colorBuffer_all_thread(kNProc); - - for (size_t thread_id = 0; thread_id < kNProc; thread_id++) { - depthBuffer_all_thread[thread_id] = - std::make_unique(width_ * height_); - colorBuffer_all_thread[thread_id] = - std::make_unique(width_ * height_); - - std::fill_n(depthBuffer_all_thread[thread_id].get(), width_ * height_, - std::numeric_limits::infinity()); - std::fill_n(colorBuffer_all_thread[thread_id].get(), width_ * height_, 0); - } - -#pragma omp parallel num_threads(kNProc) default(none) \ - shared(processedVertices, rasterizer_, shader_, width_, height_, \ - depthBuffer_all_thread, colorBuffer_all_thread) \ - firstprivate(model) - { - int thread_id = omp_get_thread_num(); - auto &depthBuffer_per_thread = depthBuffer_all_thread[thread_id]; - auto &colorBuffer_per_thread = colorBuffer_all_thread[thread_id]; -#pragma omp for - for (const auto &f : model.GetFaces()) { - auto v0 = processedVertices[f.GetIndex(0)]; - auto v1 = processedVertices[f.GetIndex(1)]; - auto v2 = processedVertices[f.GetIndex(2)]; - - const Material *material = &f.GetMaterial(); - - auto fragments = rasterizer_->Rasterize(v0, v1, v2); - - for (auto &fragment : fragments) { - fragment.material = material; - - size_t x = fragment.screen_coord[0]; - size_t y = fragment.screen_coord[1]; - - if (x >= width_ || y >= height_) { - continue; - } - - size_t index = x + y * width_; - - if (fragment.depth < depthBuffer_per_thread[index]) { - depthBuffer_per_thread[index] = fragment.depth; - - /* * * Fragment Shader * * */ - auto color = shader_->FragmentShader(fragment); - colorBuffer_per_thread[index] = uint32_t(color); - } - } - } - } - - // Merge - std::unique_ptr depthBuffer = - std::make_unique(width_ * height_); - std::unique_ptr colorBuffer = - std::make_unique(width_ * height_); - - std::fill_n(depthBuffer.get(), width_ * height_, - std::numeric_limits::infinity()); - std::fill_n(colorBuffer.get(), width_ * height_, 0); - -#pragma omp parallel for - for (size_t i = 0; i < width_ * height_; i++) { - float min_depth = std::numeric_limits::infinity(); - uint32_t color = 0; - - for (size_t thread_id = 0; thread_id < kNProc; thread_id++) { - float depth = depthBuffer_all_thread[thread_id][i]; - if (depth < min_depth) { - min_depth = depth; - color = colorBuffer_all_thread[thread_id][i]; - } - } - depthBuffer[i] = min_depth; - colorBuffer[i] = color; - } - - std::memcpy(buffer, colorBuffer.get(), width_ * height_ * sizeof(uint32_t)); +void SimpleRenderer::SetRenderingMode(RenderingMode mode) { + current_mode_ = mode; + SPDLOG_INFO("rendering mode set to: {}", RenderingModeToString(mode)); + renderer_.reset(); + EnsureRenderer(); } -/* -Organizes processing to simulate how OpenGL works with GPUs by collecting all -fragments per pixel before processing, closely mimicking the GPU pipeline but -leading to increased memory usage and slower performance. - -组织处理方式模拟 OpenGL 在 GPU -上的工作原理,先收集每个像素的所有片段再并行处理屏幕上的每个像素,模仿 GPU -管线,但导致内存使用增加和渲染速度变慢 -*/ -void SimpleRenderer::DrawModelSlower(const Model &model, uint32_t *buffer) { - SPDLOG_INFO("draw {}", model.GetModelPath()); - - /* * * Vertex Shader * * */ - std::vector processedVertex; - std::vector> processed_vertices_per_thread(kNProc); -#pragma omp parallel num_threads(kNProc) default(none) \ - shared(shader_, processed_vertices_per_thread) firstprivate(model) - { - int thread_id = omp_get_thread_num(); - std::vector &local_vertices = - processed_vertices_per_thread[thread_id]; - -#pragma omp for - for (const auto &v : model.GetVertices()) { - /* * * Vertex Shader * * */ - auto vertex = shader_->VertexShader(v); - local_vertices.push_back(vertex); - } - } - - for (const auto &local_vertices : processed_vertices_per_thread) { - processedVertex.insert(processedVertex.end(), local_vertices.begin(), - local_vertices.end()); - } - /* * * * * * * */ - - /* * * Rasterization * * */ - std::vector>> fragmentsBuffer_all_thread( - kNProc, std::vector>(width_ * height_)); - -#pragma omp parallel num_threads(kNProc) default(none) \ - shared(processedVertex, fragmentsBuffer_all_thread, rasterizer_, width_, \ - height_) firstprivate(model) - { - int thread_id = omp_get_thread_num(); - auto &fragmentsBuffer_per_thread = fragmentsBuffer_all_thread[thread_id]; - -#pragma omp for - for (const auto &f : model.GetFaces()) { - auto v0 = processedVertex[f.GetIndex(0)]; - auto v1 = processedVertex[f.GetIndex(1)]; - auto v2 = processedVertex[f.GetIndex(2)]; - - const Material *material = &f.GetMaterial(); - - auto fragments = rasterizer_->Rasterize(v0, v1, v2); - - for (auto &fragment : fragments) { - fragment.material = material; - - size_t x = fragment.screen_coord[0]; - size_t y = fragment.screen_coord[1]; +RenderingMode SimpleRenderer::GetRenderingMode() const { return current_mode_; } - if (x >= width_ || y >= height_) { - continue; - } - - size_t index = x + y * width_; - fragmentsBuffer_per_thread[index].push_back(fragment); - } - } +void SimpleRenderer::SetEarlyZEnabled(bool enabled) { + tbr_early_z_ = enabled; + if (current_mode_ == RenderingMode::TILE_BASED) { + renderer_.reset(); + EnsureRenderer(); } +} - // Merge fragments - std::vector> fragmentsBuffer(width_ * height_); - for (const auto &fragmentsBuffer_per_thread : fragmentsBuffer_all_thread) { - for (size_t i = 0; i < fragmentsBuffer_per_thread.size(); i++) { - fragmentsBuffer[i].insert(fragmentsBuffer[i].end(), - fragmentsBuffer_per_thread[i].begin(), - fragmentsBuffer_per_thread[i].end()); - } +void SimpleRenderer::SetTileSize(size_t tile_size) { + tbr_tile_size_ = tile_size; + if (current_mode_ == RenderingMode::TILE_BASED) { + renderer_.reset(); + EnsureRenderer(); } -/* * * * * * * */ +} -/* * * Fragment Shader * * */ -#pragma omp parallel for - for (size_t i = 0; i < fragmentsBuffer.size(); i++) { - const auto &fragments = fragmentsBuffer[i]; - if (fragments.empty()) { - continue; +void SimpleRenderer::EnsureRenderer() { + if (renderer_) return; + switch (current_mode_) { // 延迟初始化,根据模式创建相应实例 + case RenderingMode::PER_TRIANGLE: { + auto r = std::make_unique(width_, height_); + renderer_ = std::move(r); + break; } - - const Fragment *renderFragment = nullptr; - for (const auto &fragment : fragments) { - if (!renderFragment || fragment.depth < renderFragment->depth) { - renderFragment = &fragment; - } + case RenderingMode::TILE_BASED: { + auto r = std::make_unique(width_, height_, tbr_early_z_, tbr_tile_size_); + renderer_ = std::move(r); + break; } - - if (renderFragment) { - auto color = shader_->FragmentShader(*renderFragment); - buffer[i] = uint32_t(color); + case RenderingMode::DEFERRED: { + auto r = std::make_unique(width_, height_); + renderer_ = std::move(r); + break; } } - /* * * * * * * */ } } // namespace simple_renderer diff --git a/src/renderers/deferred_renderer.cpp b/src/renderers/deferred_renderer.cpp new file mode 100644 index 0000000..523fe20 --- /dev/null +++ b/src/renderers/deferred_renderer.cpp @@ -0,0 +1,177 @@ +#include "renderers/deferred_renderer.hpp" + +#include +#include +#include +#include +#include + +#include "config.h" +#include "log_system.h" + +namespace simple_renderer { + +bool DeferredRenderer::Render(const Model& model, const Shader& shader_in, uint32_t* buffer) { + auto total_start_time = std::chrono::high_resolution_clock::now(); + auto shader = std::make_shared(shader_in); + shader->PrepareUniformCaches(); + + // 顶点变换(AoS) + auto vertex_start = std::chrono::high_resolution_clock::now(); + const auto &input_vertices = model.GetVertices(); + std::vector processedVertices(input_vertices.size()); +#pragma omp parallel for num_threads(kNProc) schedule(static) \ + shared(shader, processedVertices, input_vertices) + for (size_t i = 0; i < input_vertices.size(); ++i) { + const auto &v = input_vertices[i]; + auto clipSpaceVertex = shader->VertexShader(v); + auto ndcVertex = PerspectiveDivision(clipSpaceVertex); + auto screenSpaceVertex = ViewportTransformation(ndcVertex); + processedVertices[i] = screenSpaceVertex; + } + auto vertex_end = std::chrono::high_resolution_clock::now(); + auto vertex_ms = std::chrono::duration_cast(vertex_end - vertex_start).count() / 1000.0; + + // Buffer allocation + auto buffer_alloc_start = std::chrono::high_resolution_clock::now(); + std::vector>> fragmentsBuffer_all_thread( + kNProc, std::vector>(width_ * height_)); + + std::vector material_cache; + material_cache.reserve(model.GetFaces().size()); + for (const auto &f : model.GetFaces()) { + material_cache.emplace_back(f.GetMaterial()); + } + auto buffer_alloc_end = std::chrono::high_resolution_clock::now(); + auto buffer_alloc_ms = std::chrono::duration_cast(buffer_alloc_end - buffer_alloc_start).count() / 1000.0; + + // Rasterization: collect fragments per pixel per thread + auto raster_start = std::chrono::high_resolution_clock::now(); +#pragma omp parallel num_threads(kNProc) default(none) \ + shared(processedVertices, fragmentsBuffer_all_thread, rasterizer_, width_, \ + height_, material_cache, model) + { + int thread_id = omp_get_thread_num(); + auto &fragmentsBuffer_per_thread = fragmentsBuffer_all_thread[thread_id]; + +#pragma omp for + for (size_t face_idx = 0; face_idx < model.GetFaces().size(); ++face_idx) { + const auto &f = model.GetFaces()[face_idx]; + auto v0 = processedVertices[f.GetIndex(0)]; + auto v1 = processedVertices[f.GetIndex(1)]; + auto v2 = processedVertices[f.GetIndex(2)]; + + const Material *material = &material_cache[face_idx]; // 使用缓存的Material + auto fragments = rasterizer_->Rasterize(v0, v1, v2); + + for (auto &fragment : fragments) { + fragment.material = material; + size_t x = fragment.screen_coord[0]; + size_t y = fragment.screen_coord[1]; + + if (x >= width_ || y >= height_) continue; + size_t index = x + y * width_; + fragmentsBuffer_per_thread[index].push_back(fragment); + } + } + } + auto raster_end = std::chrono::high_resolution_clock::now(); + auto raster_ms = std::chrono::duration_cast(raster_end - raster_start).count() / 1000.0; + + /* * * Fragment Collection * * */ + auto collect_start = std::chrono::high_resolution_clock::now(); + + const size_t pixel_count = static_cast(width_) * static_cast(height_); + +#ifndef NDEBUG + for (const auto &tb : fragmentsBuffer_all_thread) { + // 断言避免越界,确保固定维度 + assert(tb.size() == pixel_count && "thread buffer size mismatch"); + } +#endif + + // Pass 1: 统计每个像素桶的总片元数 + std::vector bucket_total(pixel_count, 0); + for (const auto &tb : fragmentsBuffer_all_thread) { + for (size_t i = 0; i < pixel_count; ++i) { + bucket_total[i] += tb[i].size(); + } + } + + // Pass 2: 统一预分配 + std::vector> fragmentsBuffer(pixel_count); + for (size_t i = 0; i < pixel_count; ++i) { + if (bucket_total[i] > 0) fragmentsBuffer[i].reserve(bucket_total[i]); + } + + // Pass 3: 按桶并行合并(每个桶内部保持按线程序的插入顺序) +#pragma omp parallel for num_threads(kNProc) schedule(static) + for (long long i = 0; i < static_cast(pixel_count); ++i) { + auto &dst = fragmentsBuffer[static_cast(i)]; + for (size_t t = 0; t < fragmentsBuffer_all_thread.size(); ++t) { + auto &src = fragmentsBuffer_all_thread[t][static_cast(i)]; + dst.insert(dst.end(), + std::make_move_iterator(src.begin()), + std::make_move_iterator(src.end())); + src.clear(); + } + } + auto collect_end = std::chrono::high_resolution_clock::now(); + auto collect_ms = std::chrono::duration_cast(collect_end - collect_start).count() / 1000.0; + + /* * * Fragment Merge & Deferred Shading * * */ + auto merge_start = std::chrono::high_resolution_clock::now(); + + // Fragment Merge阶段:深度测试选择最近片段 + std::vector selected_fragments(width_ * height_, nullptr); +#pragma omp parallel for + for (size_t i = 0; i < fragmentsBuffer.size(); i++) { + const auto &fragments = fragmentsBuffer[i]; + if (fragments.empty()) continue; + const Fragment *renderFragment = nullptr; + for (const auto &fragment : fragments) { + if (!renderFragment || fragment.depth < renderFragment->depth) { + renderFragment = &fragment; + } + } + selected_fragments[i] = renderFragment; + } + auto merge_end = std::chrono::high_resolution_clock::now(); + auto merge_ms = std::chrono::duration_cast(merge_end - merge_start).count() / 1000.0; + + // Deferred Shading阶段:对选择的片段执行片段着色 + auto shade_start = std::chrono::high_resolution_clock::now(); +#pragma omp parallel for + for (size_t i = 0; i < selected_fragments.size(); i++) { + const Fragment *renderFragment = selected_fragments[i]; + if (renderFragment) { + // 添加Material指针有效性检查 + if (renderFragment->material == nullptr) { + SPDLOG_ERROR("Fragment material is nullptr at pixel {}", i); + continue; + } + auto color = shader->FragmentShader(*renderFragment); + buffer[i] = uint32_t(color); + } + } + auto shade_end = std::chrono::high_resolution_clock::now(); + auto shade_ms = std::chrono::duration_cast(shade_end - shade_start).count() / 1000.0; + + auto total_end_time = std::chrono::high_resolution_clock::now(); + double total_ms = std::chrono::duration_cast(total_end_time - total_start_time).count() / 1000.0; + + SPDLOG_DEBUG("=== DEFERRED RENDERING PERFORMANCE ==="); + double sum_ms = vertex_ms + (total_ms - vertex_ms); + SPDLOG_DEBUG("Vertex Shader: {:8.3f} ms ({:5.1f}%)", vertex_ms, vertex_ms/sum_ms*100); + SPDLOG_DEBUG("Buffer Alloc: {:8.3f} ms", buffer_alloc_ms); + SPDLOG_DEBUG("Rasterization: {:8.3f} ms", raster_ms); + SPDLOG_DEBUG("Fragment Collection: {:8.3f} ms", collect_ms); + SPDLOG_DEBUG("Fragment Merge: {:8.3f} ms", merge_ms); + SPDLOG_DEBUG("Deferred Shading: {:8.3f} ms", shade_ms); + SPDLOG_DEBUG("Total: {:8.3f} ms", vertex_ms + (buffer_alloc_ms + raster_ms + collect_ms + merge_ms + shade_ms)); + SPDLOG_DEBUG("========================================="); + + return true; +} + +} // namespace simple_renderer diff --git a/src/renderers/per_triangle_renderer.cpp b/src/renderers/per_triangle_renderer.cpp new file mode 100644 index 0000000..9348594 --- /dev/null +++ b/src/renderers/per_triangle_renderer.cpp @@ -0,0 +1,173 @@ +#include "renderers/per_triangle_renderer.hpp" + +#include + +#include +#include +#include +#include +#include + +#include "config.h" +#include "log_system.h" + +namespace simple_renderer { + +bool PerTriangleRenderer::Render(const Model &model, const Shader &shader_in, + uint32_t *buffer) { + auto total_start_time = std::chrono::high_resolution_clock::now(); + + // 复制 shader 以便在多线程中共享 + auto shader = std::make_shared(shader_in); + shader->PrepareUniformCaches(); + + // 顶点变换(AoS) + auto vertex_start = std::chrono::high_resolution_clock::now(); + const auto &input_vertices = model.GetVertices(); + std::vector processedVertices(input_vertices.size()); + +#pragma omp parallel for num_threads(kNProc) schedule(static) \ + shared(shader, processedVertices, input_vertices) + for (size_t i = 0; i < input_vertices.size(); ++i) { + const auto &v = input_vertices[i]; + auto clipSpaceVertex = shader->VertexShader(v); + auto ndcVertex = PerspectiveDivision(clipSpaceVertex); + auto screenSpaceVertex = ViewportTransformation(ndcVertex); + processedVertices[i] = screenSpaceVertex; + } + auto vertex_end = std::chrono::high_resolution_clock::now(); + auto vertex_ms = std::chrono::duration_cast( + vertex_end - vertex_start) + .count() / + 1000.0; + + // 1. 为每个线程创建framebuffer + auto buffer_alloc_start = std::chrono::high_resolution_clock::now(); + std::vector> depthBuffer_all_thread(kNProc); + std::vector> colorBuffer_all_thread(kNProc); + + for (size_t thread_id = 0; thread_id < kNProc; thread_id++) { + depthBuffer_all_thread[thread_id] = + std::make_unique(width_ * height_); + colorBuffer_all_thread[thread_id] = + std::make_unique(width_ * height_); + std::fill_n(depthBuffer_all_thread[thread_id].get(), width_ * height_, + std::numeric_limits::infinity()); + std::fill_n(colorBuffer_all_thread[thread_id].get(), width_ * height_, 0); + } + auto buffer_alloc_end = std::chrono::high_resolution_clock::now(); + auto buffer_alloc_ms = std::chrono::duration_cast( + buffer_alloc_end - buffer_alloc_start) + .count() / + 1000.0; + + // 2. 并行光栅化 + auto raster_start = std::chrono::high_resolution_clock::now(); +#pragma omp parallel num_threads(kNProc) default(none) \ + shared(processedVertices, shader, rasterizer_, width_, height_, \ + depthBuffer_all_thread, colorBuffer_all_thread, model) + { + int thread_id = omp_get_thread_num(); + auto &depthBuffer_per_thread = depthBuffer_all_thread[thread_id]; + auto &colorBuffer_per_thread = colorBuffer_all_thread[thread_id]; + +#pragma omp for + for (const auto &f : model.GetFaces()) { + auto v0 = processedVertices[f.GetIndex(0)]; + auto v1 = processedVertices[f.GetIndex(1)]; + auto v2 = processedVertices[f.GetIndex(2)]; + + // 背面剔除(屏幕空间叉积) + Vector2f screen0(v0.GetPosition().x, v0.GetPosition().y); + Vector2f screen1(v1.GetPosition().x, v1.GetPosition().y); + Vector2f screen2(v2.GetPosition().x, v2.GetPosition().y); + + // 计算屏幕空间叉积判断朝向 + Vector2f edge1 = screen1 - screen0; + Vector2f edge2 = screen2 - screen0; + + // 背面剔除:NDC空间中叉积为负表示顺时针,即背面。 + // 从NDC到屏幕空间中,会发生Y轴翻转,对应叉积应为正。 + float cross_product = edge1.x * edge2.y - edge1.y * edge2.x; + if (cross_product > 0.0f) { + continue; // 背面 + } + + const Material *material = &f.GetMaterial(); + auto fragments = rasterizer_->Rasterize(v0, v1, v2); + + for (auto &fragment : fragments) { + fragment.material = material; + size_t x = fragment.screen_coord[0]; + size_t y = fragment.screen_coord[1]; + if (x >= width_ || y >= height_) { + continue; + } + size_t index = x + y * width_; + if (fragment.depth < depthBuffer_per_thread[index]) { + depthBuffer_per_thread[index] = fragment.depth; + auto color = shader->FragmentShader(fragment); + colorBuffer_per_thread[index] = uint32_t(color); + } + } + } + } + auto raster_end = std::chrono::high_resolution_clock::now(); + auto raster_ms = std::chrono::duration_cast( + raster_end - raster_start) + .count() / + 1000.0; + + // 3. 合并结果 + auto merge_start = std::chrono::high_resolution_clock::now(); + std::unique_ptr depthBuffer = + std::make_unique(width_ * height_); + std::unique_ptr colorBuffer = + std::make_unique(width_ * height_); + std::fill_n(depthBuffer.get(), width_ * height_, + std::numeric_limits::infinity()); + std::fill_n(colorBuffer.get(), width_ * height_, 0); + +#pragma omp parallel for + for (size_t i = 0; i < width_ * height_; i++) { + float min_depth = std::numeric_limits::infinity(); + uint32_t color = 0; + for (size_t thread_id = 0; thread_id < kNProc; thread_id++) { + float depth = depthBuffer_all_thread[thread_id][i]; + if (depth < min_depth) { + min_depth = depth; + color = colorBuffer_all_thread[thread_id][i]; + } + } + depthBuffer[i] = min_depth; + colorBuffer[i] = color; + } + + std::memcpy(buffer, colorBuffer.get(), width_ * height_ * sizeof(uint32_t)); + auto merge_end = std::chrono::high_resolution_clock::now(); + auto merge_ms = std::chrono::duration_cast( + merge_end - merge_start) + .count() / + 1000.0; + + auto total_end_time = std::chrono::high_resolution_clock::now(); + auto total_ms = std::chrono::duration_cast( + total_end_time - total_start_time) + .count() / + 1000.0; + + SPDLOG_DEBUG("=== PER-TRIANGLE RENDERING PERFORMANCE ==="); + double sum_ms = vertex_ms + (total_ms - vertex_ms); + SPDLOG_DEBUG("Vertex Shader: {:8.3f} ms ({:5.1f}%)", vertex_ms, + vertex_ms / sum_ms * 100); + SPDLOG_DEBUG("Buffer Alloc: {:8.3f} ms", buffer_alloc_ms); + SPDLOG_DEBUG("Rasterization: {:8.3f} ms", raster_ms); + SPDLOG_DEBUG("Merge: {:8.3f} ms", merge_ms); + SPDLOG_DEBUG("Total: {:8.3f} ms", + vertex_ms + (buffer_alloc_ms + raster_ms + merge_ms)); + SPDLOG_DEBUG("=========================================="); + + return true; +} + +} // namespace simple_renderer diff --git a/src/renderers/renderer_base.cpp b/src/renderers/renderer_base.cpp new file mode 100644 index 0000000..5a82e5a --- /dev/null +++ b/src/renderers/renderer_base.cpp @@ -0,0 +1,44 @@ +#include "renderers/renderer_base.hpp" + +#include + +namespace simple_renderer { + +Vertex RendererBase::PerspectiveDivision(const Vertex &vertex) { + Vector4f position = vertex.GetPosition(); + + if (position.w <= kMinWValue) { + Vector4f farPosition(0.0f, 0.0f, 1.0f, 1.0f); + return Vertex(farPosition, vertex.GetNormal(), vertex.GetTexCoords(), vertex.GetColor()); + } + + float original_w = position.w; + Vector4f ndcPosition( + position.x / position.w, // x_ndc = x_clip / w_clip + position.y / position.w, // y_ndc = y_clip / w_clip + position.z / position.w, // z_ndc = z_clip / w_clip + 1.0f / original_w // 保存1/w用于透视矫正插值 + ); + + ndcPosition.z = std::clamp(ndcPosition.z, -1.0f, 1.0f); + return Vertex(ndcPosition, vertex.GetNormal(), vertex.GetTexCoords(), vertex.GetColor(), vertex.GetClipPosition()); +} + +Vertex RendererBase::ViewportTransformation(const Vertex &vertex) { + Vector4f ndcPosition = vertex.GetPosition(); + + // 视口变换:将NDC坐标[-1,1]转换为屏幕坐标[0,width]x[0,height] + float screen_x = (ndcPosition.x + 1.0f) * width_ / 2.0f; + float screen_y = (1.0f - ndcPosition.y) * height_ / 2.0f; + + Vector4f screenPosition( + screen_x, + screen_y, + ndcPosition.z, + ndcPosition.w); + + return Vertex(screenPosition, vertex.GetNormal(), vertex.GetTexCoords(), vertex.GetColor()); +} + +} // namespace simple_renderer + diff --git a/src/renderers/tile_based_renderer.cpp b/src/renderers/tile_based_renderer.cpp new file mode 100644 index 0000000..e39526e --- /dev/null +++ b/src/renderers/tile_based_renderer.cpp @@ -0,0 +1,535 @@ +#include "renderers/tile_based_renderer.hpp" + +#include + +#include +#include +#include +#include +#include + +#include "config.h" +#include "log_system.h" + +namespace simple_renderer { + +bool TileBasedRenderer::Render(const Model &model, const Shader &shader_in, + uint32_t *buffer) { + auto total_start_time = std::chrono::high_resolution_clock::now(); + auto shader = std::make_shared(shader_in); + shader->PrepareUniformCaches(); + + // 顶点变换(SoA) + auto vertex_start = std::chrono::high_resolution_clock::now(); + const auto &input_vertices = model.GetVertices(); + VertexSoA soa; + soa.resize(input_vertices.size()); + +#pragma omp parallel for num_threads(kNProc) schedule(static) \ + shared(shader, soa, input_vertices) + for (size_t i = 0; i < input_vertices.size(); ++i) { + const auto &v = input_vertices[i]; + auto clipSpaceVertex = shader->VertexShader(v); + soa.pos_clip[i] = clipSpaceVertex.GetPosition(); + auto ndcVertex = PerspectiveDivision(clipSpaceVertex); + auto screenSpaceVertex = ViewportTransformation(ndcVertex); + soa.pos_screen[i] = screenSpaceVertex.GetPosition(); + soa.normal[i] = screenSpaceVertex.GetNormal(); + soa.uv[i] = screenSpaceVertex.GetTexCoords(); + soa.color[i] = screenSpaceVertex.GetColor(); + } + auto vertex_end = std::chrono::high_resolution_clock::now(); + auto vertex_ms = std::chrono::duration_cast( + vertex_end - vertex_start) + .count() / + 1000.0; + + // 1. Setup + auto setup_start = std::chrono::high_resolution_clock::now(); + const size_t TILE_SIZE = tile_size_ > 0 ? tile_size_ : 64; + const size_t tiles_x = (width_ + TILE_SIZE - 1) / TILE_SIZE; + const size_t tiles_y = (height_ + TILE_SIZE - 1) / TILE_SIZE; + const size_t total_tiles = tiles_x * tiles_y; + + // 为每个tile创建三角形列表(SoA 引用) + std::vector> tile_triangles(total_tiles); + auto setup_end = std::chrono::high_resolution_clock::now(); + auto setup_ms = std::chrono::duration_cast( + setup_end - setup_start) + .count() / + 1000.0; + + // 2. Binning + auto binning_start = std::chrono::high_resolution_clock::now(); + TileGridContext grid_ctx{soa, tiles_x, tiles_y, TILE_SIZE}; + TriangleTileBinning(model, grid_ctx, tile_triangles); + auto binning_end = std::chrono::high_resolution_clock::now(); + auto binning_ms = std::chrono::duration_cast( + binning_end - binning_start) + .count() / + 1000.0; + + // 3. 单份全局 framebuffer + // 直接让每个 tile 写入这份全局缓冲区,避免末端 O(W*H*kNProc) 合并开销 + + auto buffer_alloc_start = std::chrono::high_resolution_clock::now(); + std::unique_ptr depthBuffer = + std::make_unique(width_ * height_); + std::unique_ptr colorBuffer = + std::make_unique(width_ * height_); + + // 深度初始化为最远值,颜色清零 + std::fill_n(depthBuffer.get(), width_ * height_, kDepthClear); + std::fill_n(colorBuffer.get(), width_ * height_, kColorClear); + auto buffer_alloc_end = std::chrono::high_resolution_clock::now(); + auto buffer_alloc_ms = std::chrono::duration_cast( + buffer_alloc_end - buffer_alloc_start) + .count() / + 1000.0; + + // 4. 并行光栅化每个 tile(SoA + early-z) + auto raster_start = std::chrono::high_resolution_clock::now(); + std::vector tile_stats(total_tiles); +#pragma omp parallel num_threads(kNProc) default(none) \ + shared(tile_triangles, shader, depthBuffer, colorBuffer, total_tiles, \ + grid_ctx, early_z_, tile_stats) + { + // 为每个 tile 分配局部深度和颜色缓冲 + std::unique_ptr tile_depth_buffer = + std::make_unique(grid_ctx.tile_size * grid_ctx.tile_size); + std::unique_ptr tile_color_buffer = + std::make_unique(grid_ctx.tile_size * grid_ctx.tile_size); + + // 为每个 tile 分配可复用片段临时容器,容量按单 tile 上限预估 + std::vector scratch_fragments; + scratch_fragments.reserve(grid_ctx.tile_size * grid_ctx.tile_size); + +#pragma omp for schedule(static) + for (size_t tile_id = 0; tile_id < total_tiles; ++tile_id) { + // 按照 tile 进行光栅化(SoA) + // 直接写入单份全局 framebuffer;不同 tile 不重叠,无需加锁 + RasterizeTile(tile_id, tile_triangles[tile_id], grid_ctx, + tile_depth_buffer.get(), tile_color_buffer.get(), + depthBuffer, colorBuffer, *shader, early_z_, + &scratch_fragments, &tile_stats[tile_id]); + } + } + auto raster_end = std::chrono::high_resolution_clock::now(); + auto raster_ms = std::chrono::duration_cast( + raster_end - raster_start) + .count() / + 1000.0; + + // 汇总并打印掩码收益统计 + uint64_t sum_tested = 0, sum_covered = 0, sum_zpass = 0, sum_shaded = 0; + for (const auto& s : tile_stats) { + sum_tested += s.tested; + sum_covered += s.covered; + sum_zpass += s.zpass; + sum_shaded += s.shaded; + } + auto rate = [](uint64_t num, uint64_t den) -> double { + if (den == 0) return 0.0; return double(num) / double(den) * 100.0; + }; + SPDLOG_DEBUG( + "TBR Mask Stats: tested={}, covered={} ({:.1f}%), zpass={} ({:.1f}%), shaded={} ({:.1f}%)", + sum_tested, sum_covered, rate(sum_covered, sum_tested), + sum_zpass, rate(sum_zpass, sum_covered), + sum_shaded, rate(sum_shaded, sum_covered)); + + // 5. 直接将单份全局 colorBuffer 拷贝到输出 + auto present_start = std::chrono::high_resolution_clock::now(); + std::memcpy(buffer, colorBuffer.get(), width_ * height_ * sizeof(uint32_t)); + auto present_end = std::chrono::high_resolution_clock::now(); + auto present_ms = std::chrono::duration_cast( + present_end - present_start) + .count() / + 1000.0; + + auto total_end_time = std::chrono::high_resolution_clock::now(); + double total_ms = std::chrono::duration_cast( + total_end_time - total_start_time) + .count() / + 1000.0; + + SPDLOG_DEBUG("=== TILE-BASED RENDERING PERFORMANCE ==="); + double sum_ms = vertex_ms + (total_ms - vertex_ms); + SPDLOG_DEBUG("Vertex Shader: {:8.3f} ms ({:5.1f}%)", vertex_ms, + vertex_ms / sum_ms * 100); + SPDLOG_DEBUG("Setup: {:8.3f} ms", setup_ms); + SPDLOG_DEBUG("Binning: {:8.3f} ms", binning_ms); + SPDLOG_DEBUG("Buffer Alloc: {:8.3f} ms", buffer_alloc_ms); + SPDLOG_DEBUG("Rasterization: {:8.3f} ms", raster_ms); + SPDLOG_DEBUG("Copy: {:8.3f} ms", present_ms); + SPDLOG_DEBUG("Total: {:8.3f} ms", + vertex_ms + (setup_ms + binning_ms + buffer_alloc_ms + raster_ms + + present_ms)); + SPDLOG_DEBUG("=========================================="); + + return true; +} + +void TileBasedRenderer::TriangleTileBinning( + const Model& model, + const TileGridContext& grid, + std::vector> &tile_triangles) { + const size_t total_triangles = model.GetFaces().size(); + + SPDLOG_DEBUG("Starting triangle-tile binning (SoA) for {} triangles", + total_triangles); + SPDLOG_DEBUG("Screen dimensions: {}x{}, Tile size: {}, Tiles: {}x{}", width_, + height_, grid.tile_size, grid.tiles_x, grid.tiles_y); + + std::vector tile_counts(grid.tiles_x * grid.tiles_y, 0); + + // 第一遍(count only):计算每个tile需要容纳多少三角形 + for (size_t tri_idx = 0; tri_idx < total_triangles; ++tri_idx) { + ProcessTriangleForTileBinning(tri_idx, true, model, grid, + tile_counts, tile_triangles); + } + + // 预分配,避免动态扩容 + for (size_t tile_id = 0; tile_id < tile_triangles.size(); ++tile_id) { + if (tile_counts[tile_id] > 0) + tile_triangles[tile_id].reserve(tile_counts[tile_id]); + } + + // 第二遍(fill):按范围填充TriangleRef + for (size_t tri_idx = 0; tri_idx < total_triangles; ++tri_idx) { + ProcessTriangleForTileBinning(tri_idx, false, model, grid, + tile_counts, tile_triangles); + } + + size_t total_triangle_refs = 0; + size_t non_empty_tiles = 0; + for (const auto &tile : tile_triangles) { + total_triangle_refs += tile.size(); + if (!tile.empty()) non_empty_tiles++; + } + SPDLOG_DEBUG(" (SoA) Total triangle references: {}", total_triangle_refs); + SPDLOG_DEBUG(" (SoA) Non-empty tiles: {}", non_empty_tiles); + SPDLOG_DEBUG(" (SoA) Average triangles per tile: {:.2f}", + total_triangle_refs > 0 + ? float(total_triangle_refs) / tile_triangles.size() + : 0.0f); +} + +void TileBasedRenderer::RasterizeTile( + size_t tile_id, const std::vector &triangles, + const TileGridContext& grid, float *tile_depth_buffer, + uint32_t *tile_color_buffer, std::unique_ptr &global_depth_buffer, + std::unique_ptr &global_color_buffer, + const Shader &shader, bool use_early_z, + std::vector *scratch_fragments, + TileMaskStats* out_stats) { + // 计算 tile 屏幕范围 + size_t tile_x = tile_id % grid.tiles_x; + size_t tile_y = tile_id / grid.tiles_x; + size_t screen_x_start = tile_x * grid.tile_size; + size_t screen_y_start = tile_y * grid.tile_size; + size_t screen_x_end = std::min(screen_x_start + grid.tile_size, width_); + size_t screen_y_end = std::min(screen_y_start + grid.tile_size, height_); + + // 初始化 tile 局部缓冲 + size_t tile_width = screen_x_end - screen_x_start; + size_t tile_height = screen_y_end - screen_y_start; + std::fill_n(tile_depth_buffer, tile_width * tile_height, kDepthClear); + std::fill_n(tile_color_buffer, tile_width * tile_height, kColorClear); + + // 掩码化扫描:按三角形直接写入 tile 局部缓冲,避免中间片段向量 + constexpr int kLane = 8; // 横向处理的像素个数(便于编译器自动向量化) + + // 轻量统计:用于评估掩码收益(仅对少量tile打印DEBUG) + uint64_t tested_pixels = 0; + uint64_t covered_pixels = 0; + uint64_t zpass_pixels = 0; + uint64_t shaded_pixels = 0; + + auto cross2 = [](float ax, float ay, float bx, float by) { + return ax * by - ay * bx; + }; + + for (const auto &tri : triangles) { + const auto i0 = tri.i0, i1 = tri.i1, i2 = tri.i2; + + // 顶点屏幕坐标 + const Vector4f &p0 = grid.soa.pos_screen[i0]; + const Vector4f &p1 = grid.soa.pos_screen[i1]; + const Vector4f &p2 = grid.soa.pos_screen[i2]; + + // 三角形屏幕空间 AABB,与 tile 矩形求交 + const float tri_minx = std::min({p0.x, p1.x, p2.x}); + const float tri_miny = std::min({p0.y, p1.y, p2.y}); + const float tri_maxx = std::max({p0.x, p1.x, p2.x}); + const float tri_maxy = std::max({p0.y, p1.y, p2.y}); + + int sx = std::max(static_cast(screen_x_start), + static_cast(std::floor(std::max(0.0f, tri_minx)))); + int sy = std::max(static_cast(screen_y_start), + static_cast(std::floor(std::max(0.0f, tri_miny)))); + int ex = std::min(static_cast(screen_x_end - 1), + static_cast(std::floor(std::min(width_ - 1, tri_maxx)))); + int ey = std::min(static_cast(screen_y_end - 1), + static_cast(std::floor(std::min(height_ - 1, tri_maxy)))); + if (sx > ex || sy > ey) continue; + + // 边向量与有向面积 + const float e01x = p1.x - p0.x, e01y = p1.y - p0.y; + const float e12x = p2.x - p1.x, e12y = p2.y - p1.y; + const float e20x = p0.x - p2.x, e20y = p0.y - p2.y; + const float area2 = cross2(e01x, e01y, p2.x - p0.x, p2.y - p0.y); + if (std::abs(area2) < 1e-6f) continue; // 退化三角形 + const bool positive = (area2 > 0.0f); + + // z 与 1/w 的平面插值准备 + const float z0 = p0.z, z1 = p1.z, z2 = p2.z; + const float w0_inv = 1.0f / p0.w, w1_inv = 1.0f / p1.w, w2_inv = 1.0f / p2.w; + + // 行扫描 + for (int y = sy; y <= ey; ++y) { // 行优先遍历:有利于 cache 与向量化 + const float yf = static_cast(y); + for (int xb = sx; xb <= ex; xb += kLane) { // 每次处理kLane个像素 + const int lane = std::min(kLane, ex - xb + 1); // 当前需要处理的像素个数 + const float x0f = static_cast(xb); // 本块起点的x坐标 + + // 计算本块起点的三个边函数值与横向步长(dE/dx) + float E01_base = cross2(e01x, e01y, x0f - p0.x, yf - p0.y); + float E12_base = cross2(e12x, e12y, x0f - p1.x, yf - p1.y); + float E20_base = cross2(e20x, e20y, x0f - p2.x, yf - p2.y); + const float dE01dx = -e01y; + const float dE12dx = -e12y; + const float dE20dx = -e20y; + + // ============== 构造覆盖掩码 cover mask ============== + unsigned mask_cover = 0u; + int cover_count = 0; + float E01[kLane], E12[kLane], E20[kLane]; + #pragma omp simd + for (int j = 0; j < lane; ++j) { + E01[j] = E01_base + dE01dx * static_cast(j); + E12[j] = E12_base + dE12dx * static_cast(j); + E20[j] = E20_base + dE20dx * static_cast(j); + } + for (int j = 0; j < lane; ++j) { // 内点测试,如果三角形在像素内,则将该像素加入覆盖掩码 + bool inside = positive ? (E01[j] >= 0.0f && E12[j] >= 0.0f && E20[j] >= 0.0f) + : (E01[j] <= 0.0f && E12[j] <= 0.0f && E20[j] <= 0.0f); + if (inside) { + mask_cover |= (1u << j); + cover_count++; + } + } + tested_pixels += static_cast(lane); + covered_pixels += static_cast(cover_count); + if (mask_cover == 0u) continue; + + // ============== 计算 z,进行early-z掩码 ============== + unsigned mask_zpass = 0u; + float zvals[kLane]; + // 缓存校正后的重心坐标,避免着色阶段重复计算 + float b0c_arr[kLane]; + float b1c_arr[kLane]; + float b2c_arr[kLane]; + int zpass_count = 0; + for (int j = 0; j < lane; ++j) { + if (((mask_cover >> j) & 1u) == 0u) { continue; } // 如果该像素不在覆盖掩码内,则跳过 + const float b0 = E12[j] / area2; + const float b1 = E20[j] / area2; + const float b2 = E01[j] / area2; + const float w_inv = b0 * w0_inv + b1 * w1_inv + b2 * w2_inv; // 透视矫正 + const float b0c = (b0 * w0_inv) / w_inv; + const float b1c = (b1 * w1_inv) / w_inv; + const float b2c = (b2 * w2_inv) / w_inv; + b0c_arr[j] = b0c; b1c_arr[j] = b1c; b2c_arr[j] = b2c; + const float z = z0 * b0c + z1 * b1c + z2 * b2c; + zvals[j] = z; + + const int sx_pix = xb + j; + const int local_x = sx_pix - static_cast(screen_x_start); + const int local_y = y - static_cast(screen_y_start); + const size_t idx = static_cast(local_x + local_y * static_cast(tile_width)); + if (z < tile_depth_buffer[idx]) { + mask_zpass |= (1u << j); + zpass_count++; + } + } + zpass_pixels += static_cast(zpass_count); + + // ============== 构造最终掩码 ============== + unsigned mask_final = use_early_z ? (mask_cover & mask_zpass) : mask_cover; + if (mask_final == 0u && use_early_z) continue; + + // 对掩码内像素着色并写回(非 early-z 时,先着色,再按 z 测试写入) + for (int j = 0; j < lane; ++j) { + if (((mask_final >> j) & 1u) == 0u && use_early_z) continue; + const int sx_pix = xb + j; + const int local_x = sx_pix - static_cast(screen_x_start); + const int local_y = y - static_cast(screen_y_start); + const size_t idx = static_cast(local_x + local_y * static_cast(tile_width)); + + // 计算插值属性 + const float b0c = b0c_arr[j]; + const float b1c = b1c_arr[j]; + const float b2c = b2c_arr[j]; + + Fragment frag; + frag.screen_coord = {sx_pix, y}; + frag.depth = zvals[j]; + frag.material = tri.material; + + // 法向量插值 + const Vector3f &n0 = grid.soa.normal[i0]; + const Vector3f &n1 = grid.soa.normal[i1]; + const Vector3f &n2 = grid.soa.normal[i2]; + frag.normal = n0 * b0c + n1 * b1c + n2 * b2c; + + // 纹理坐标插值 + const Vector2f &uv0 = grid.soa.uv[i0]; + const Vector2f &uv1 = grid.soa.uv[i1]; + const Vector2f &uv2 = grid.soa.uv[i2]; + frag.uv = uv0 * b0c + uv1 * b1c + uv2 * b2c; + + // 颜色插值 + const Color &c0 = grid.soa.color[i0]; + const Color &c1 = grid.soa.color[i1]; + const Color &c2 = grid.soa.color[i2]; + auto color_r = FloatToUint8_t(static_cast(c0[Color::kColorIndexRed]) * b0c + + static_cast(c1[Color::kColorIndexRed]) * b1c + + static_cast(c2[Color::kColorIndexRed]) * b2c); + auto color_g = FloatToUint8_t(static_cast(c0[Color::kColorIndexGreen]) * b0c + + static_cast(c1[Color::kColorIndexGreen]) * b1c + + static_cast(c2[Color::kColorIndexGreen]) * b2c); + auto color_b = FloatToUint8_t(static_cast(c0[Color::kColorIndexBlue]) * b0c + + static_cast(c1[Color::kColorIndexBlue]) * b1c + + static_cast(c2[Color::kColorIndexBlue]) * b2c); + frag.color = Color(color_r, color_g, color_b); + + if (use_early_z) { // 开启时,仅对mask中通过early-z的像素进行着色和写回 + auto out_color = shader.FragmentShader(frag); + tile_depth_buffer[idx] = frag.depth; + tile_color_buffer[idx] = uint32_t(out_color); + shaded_pixels++; + } else { + // 关闭时,先着色,再按z测试写入 + auto out_color = shader.FragmentShader(frag); + if (frag.depth < tile_depth_buffer[idx]) { // late-z + tile_depth_buffer[idx] = frag.depth; + tile_color_buffer[idx] = uint32_t(out_color); + shaded_pixels++; + } + } + } + } + } + } + + if (out_stats) { + out_stats->tested = tested_pixels; + out_stats->covered = covered_pixels; + out_stats->zpass = zpass_pixels; + out_stats->shaded = shaded_pixels; + } + + // 写回全局缓冲 + // TBR 下不同 tile 覆盖的屏幕区域互不重叠,且在 tile 内部已通过 Early‑Z + // 得出每个像素的最终值。因此可以直接将 tile 行数据拷贝到全局缓冲 + for (size_t y = 0; y < tile_height; y++) { + const size_t tile_row_off = y * tile_width; + const size_t global_row_off = + (screen_y_start + y) * width_ + screen_x_start; + + // 拷贝本行 color 到全局 color + std::memcpy(global_color_buffer.get() + global_row_off, + tile_color_buffer + tile_row_off, + tile_width * sizeof(uint32_t)); + + // 拷贝本行 depth 到全局 depth + std::memcpy(global_depth_buffer.get() + global_row_off, + tile_depth_buffer + tile_row_off, tile_width * sizeof(float)); + } +} + +void TileBasedRenderer::ProcessTriangleForTileBinning( + size_t tri_idx, bool count_only, const Model &model, + const TileGridContext &grid, std::vector &tile_counts, + std::vector> &tile_triangles) { + const auto &f = model.GetFaces()[tri_idx]; + size_t i0 = f.GetIndex(0); + size_t i1 = f.GetIndex(1); + size_t i2 = f.GetIndex(2); + + // 视锥体裁剪 (裁剪空间) + // 保守视锥体裁剪:只有当整个三角形都在视锥体外同一侧时才裁剪 + const Vector4f &c0 = grid.soa.pos_clip[i0]; + const Vector4f &c1 = grid.soa.pos_clip[i1]; + const Vector4f &c2 = grid.soa.pos_clip[i2]; + bool frustum_cull = + (c0.x > c0.w && c1.x > c1.w && c2.x > c2.w) || // 右平面外 + (c0.x < -c0.w && c1.x < -c0.w && c2.x < -c0.w) || // 左平面外 + (c0.y > c0.w && c1.y > c1.w && c2.y > c2.w) || // 上平面外 + (c0.y < -c0.w && c1.y < -c0.w && c2.y < -c0.w) || // 下平面外 + (c0.z > c0.w && c1.z > c1.w && c2.z > c2.w) || // 远平面外 + (c0.z < -c0.w && c1.z < -c0.w && c2.z < -c0.w); // 近平面外 + if (frustum_cull) { + return; + } + + const Vector4f &pos0 = grid.soa.pos_screen[i0]; + const Vector4f &pos1 = grid.soa.pos_screen[i1]; + const Vector4f &pos2 = grid.soa.pos_screen[i2]; + + // 背面剔除(屏幕空间) + // NDC空间中叉积为负表示顺时针,即背面。 + // 从NDC到屏幕空间中,会发生Y轴翻转,对应叉积应为正。 + Vector2f screen0(pos0.x, pos0.y); + Vector2f screen1(pos1.x, pos1.y); + Vector2f screen2(pos2.x, pos2.y); + Vector2f edge1 = screen1 - screen0; + Vector2f edge2 = screen2 - screen0; + float cross_product = edge1.x * edge2.y - edge1.y * edge2.x; + if (cross_product > 0.0f) return; + + float screen_x0 = pos0.x; + float screen_y0 = pos0.y; + float screen_x1 = pos1.x; + float screen_y1 = pos1.y; + float screen_x2 = pos2.x; + float screen_y2 = pos2.y; + + // 计算屏幕bbox,用于后续tile划分 + float min_x = std::min({screen_x0, screen_x1, screen_x2}); + float max_x = std::max({screen_x0, screen_x1, screen_x2}); + float min_y = std::min({screen_y0, screen_y1, screen_y2}); + float max_y = std::max({screen_y0, screen_y1, screen_y2}); + + int start_tile_x = std::max(0, static_cast(min_x) / + static_cast(grid.tile_size)); + int end_tile_x = + std::min(static_cast(grid.tiles_x - 1), + static_cast(max_x) / static_cast(grid.tile_size)); + int start_tile_y = std::max(0, static_cast(min_y) / + static_cast(grid.tile_size)); + int end_tile_y = + std::min(static_cast(grid.tiles_y - 1), + static_cast(max_y) / static_cast(grid.tile_size)); + if (start_tile_x > end_tile_x || start_tile_y > end_tile_y) + return; // 如果bbox不在任何tile内,直接返回 + + if (count_only) { // 第一遍计数,只统计tile内三角形数量 + for (int ty = start_tile_y; ty <= end_tile_y; ++ty) { + for (int tx = start_tile_x; tx <= end_tile_x; ++tx) { + size_t tile_id = ty * grid.tiles_x + tx; + tile_counts[tile_id]++; + } + } + } else { // 第二遍填充,填充TriangleRef + TileTriangleRef tri_ref{i0, i1, i2, &f.GetMaterial(), tri_idx}; + for (int ty = start_tile_y; ty <= end_tile_y; ++ty) { + for (int tx = start_tile_x; tx <= end_tile_x; ++tx) { + size_t tile_id = ty * grid.tiles_x + tx; + tile_triangles[tile_id].push_back(tri_ref); + } + } + } +} + +} // namespace simple_renderer diff --git a/src/shader.cpp b/src/shader.cpp index 3438627..06ab241 100644 --- a/src/shader.cpp +++ b/src/shader.cpp @@ -1,20 +1,261 @@ #include "shader.hpp" +#include +#include +#include +#include + namespace simple_renderer { +Shader::Shader(const Shader& shader) { + std::shared_lock lock(shader.specular_cache_mutex_); + uniformbuffer_ = shader.uniformbuffer_; + sharedDataInShader_ = shader.sharedDataInShader_; + vertex_uniform_cache_ = shader.vertex_uniform_cache_; + fragment_uniform_cache_ = shader.fragment_uniform_cache_; + specular_lut_cache_ = shader.specular_lut_cache_; +} + +Shader::Shader(Shader&& shader) noexcept { + std::unique_lock lock(shader.specular_cache_mutex_); + uniformbuffer_ = std::move(shader.uniformbuffer_); + sharedDataInShader_ = shader.sharedDataInShader_; + vertex_uniform_cache_ = shader.vertex_uniform_cache_; + fragment_uniform_cache_ = shader.fragment_uniform_cache_; + specular_lut_cache_ = std::move(shader.specular_lut_cache_); +} + +auto Shader::operator=(const Shader& shader) -> Shader& { + if (this == &shader) { + return *this; + } + std::shared_lock lock(shader.specular_cache_mutex_); + uniformbuffer_ = shader.uniformbuffer_; + sharedDataInShader_ = shader.sharedDataInShader_; + vertex_uniform_cache_ = shader.vertex_uniform_cache_; + fragment_uniform_cache_ = shader.fragment_uniform_cache_; + specular_lut_cache_ = shader.specular_lut_cache_; + return *this; +} + +auto Shader::operator=(Shader&& shader) noexcept -> Shader& { + if (this == &shader) { + return *this; + } + std::unique_lock lock(shader.specular_cache_mutex_); + uniformbuffer_ = std::move(shader.uniformbuffer_); + sharedDataInShader_ = shader.sharedDataInShader_; + vertex_uniform_cache_ = shader.vertex_uniform_cache_; + fragment_uniform_cache_ = shader.fragment_uniform_cache_; + specular_lut_cache_ = std::move(shader.specular_lut_cache_); + return *this; +} + Vertex Shader::VertexShader(const Vertex& vertex) { - Matrix4f model_matrix = uniformbuffer_.GetUniform("modelMatrix"); - Matrix4f view_matrix = uniformbuffer_.GetUniform("viewMatrix"); - Matrix4f projection_matrix = - uniformbuffer_.GetUniform("projectionMatrix"); + const bool cache_ready = vertex_uniform_cache_.derived_valid; + + const Matrix4f* model_ptr = nullptr; + const Matrix4f* mvp_ptr = nullptr; + const Matrix3f* normal_ptr = nullptr; + + Matrix4f fallback_model; + Matrix4f fallback_mvp; + Matrix3f fallback_normal; + + if (cache_ready) { // 如果所有派生矩阵已预计算并可直接复用 + // 直接复用缓存矩阵,避免逐顶点哈希查询 + model_ptr = &vertex_uniform_cache_.model; + mvp_ptr = &vertex_uniform_cache_.mvp; + normal_ptr = &vertex_uniform_cache_.normal; + } else { // 如果缓存尚未建立 + fallback_model = uniformbuffer_.GetUniform("modelMatrix"); + Matrix4f view_matrix = uniformbuffer_.GetUniform("viewMatrix"); + Matrix4f projection_matrix = + uniformbuffer_.GetUniform("projectionMatrix"); + fallback_mvp = projection_matrix * view_matrix * fallback_model; + fallback_normal = + glm::transpose(glm::inverse(Matrix3f(fallback_model))); + model_ptr = &fallback_model; + mvp_ptr = &fallback_mvp; + normal_ptr = &fallback_normal; + } + + const Matrix4f& model_matrix = *model_ptr; + const Matrix4f& mvp_matrix = *mvp_ptr; + const Matrix3f& normal_matrix = *normal_ptr; + + const Vector4f position = vertex.GetPosition(); + Vector4f world_position = model_matrix * position; + Vector3f transformed_normal = normal_matrix * vertex.GetNormal(); + + // 将世界空间位置写入共享数据供片元阶段使用 + sharedDataInShader_.fragPos_varying = Vector3f(world_position); + + // 计算裁剪空间坐标 + Vector4f clip_position = mvp_matrix * position; + + // 返回变换后的顶点(包含变换后的法向量和裁剪坐标) + return Vertex(clip_position, transformed_normal, vertex.GetTexCoords(), + vertex.GetColor(), + clip_position); // 同时保存裁剪空间坐标用于后续裁剪 +} + +void Shader::UpdateMatrixCache(const std::string& name, + const Matrix4f& value) { + if (name == "modelMatrix") { + vertex_uniform_cache_.model = value; + vertex_uniform_cache_.has_model = true; + } else if (name == "viewMatrix") { + vertex_uniform_cache_.view = value; + vertex_uniform_cache_.has_view = true; + } else if (name == "projectionMatrix") { + vertex_uniform_cache_.projection = value; + vertex_uniform_cache_.has_projection = true; + } else { + return; + } + + // 任一基础矩阵更新后,标记派生矩阵失效等待重算 + vertex_uniform_cache_.derived_valid = false; + if (vertex_uniform_cache_.has_model && vertex_uniform_cache_.has_view && + vertex_uniform_cache_.has_projection) { + RecalculateDerivedMatrices(); + } +} + +void Shader::RecalculateDerivedMatrices() { + // 预计算 Model-View、MVP 以及法线矩阵,供顶点着色器复用 + vertex_uniform_cache_.model_view = + vertex_uniform_cache_.view * vertex_uniform_cache_.model; + vertex_uniform_cache_.mvp = vertex_uniform_cache_.projection * + vertex_uniform_cache_.model_view; + vertex_uniform_cache_.normal = glm::transpose(glm::inverse( + Matrix3f(vertex_uniform_cache_.model))); + vertex_uniform_cache_.derived_valid = true; +} + +void Shader::UpdateFragmentCache(const std::string& name, + const Light& value) { + if (name != "light") { + return; + } + fragment_uniform_cache_.light = value; + fragment_uniform_cache_.has_light = true; + fragment_uniform_cache_.derived_valid = false; + if (fragment_uniform_cache_.has_light && fragment_uniform_cache_.has_camera) { + RecalculateFragmentDerived(); + } +} + +void Shader::UpdateFragmentCache(const std::string& name, + const Vector3f& value) { + if (name != "cameraPos") { + return; + } + fragment_uniform_cache_.camera_pos = value; + fragment_uniform_cache_.has_camera = true; + fragment_uniform_cache_.derived_valid = false; + if (fragment_uniform_cache_.has_light && fragment_uniform_cache_.has_camera) { + RecalculateFragmentDerived(); + } +} + +void Shader::RecalculateFragmentDerived() { + fragment_uniform_cache_.light_dir_normalized = + glm::normalize(fragment_uniform_cache_.light.dir); + fragment_uniform_cache_.derived_valid = true; +} - Matrix4f mvp_matrix = projection_matrix * view_matrix * model_matrix; - // auto normal_matrix = model_matrix.inverse().transpose(); +void Shader::PrepareUniformCaches() { + PrepareVertexUniformCache(); + PrepareFragmentUniformCache(); +} - sharedDataInShader_.fragPos_varying = - Vector3f(model_matrix * vertex.GetPosition()); +void Shader::PrepareVertexUniformCache() { + if (vertex_uniform_cache_.derived_valid) { + return; + } + // 在进入渲染阶段前一次性取出常用矩阵并填充缓存 + if (uniformbuffer_.HasUniform("modelMatrix") && + uniformbuffer_.HasUniform("viewMatrix") && + uniformbuffer_.HasUniform("projectionMatrix")) { + vertex_uniform_cache_.model = + uniformbuffer_.GetUniform("modelMatrix"); + vertex_uniform_cache_.view = + uniformbuffer_.GetUniform("viewMatrix"); + vertex_uniform_cache_.projection = + uniformbuffer_.GetUniform("projectionMatrix"); + vertex_uniform_cache_.has_model = true; + vertex_uniform_cache_.has_view = true; + vertex_uniform_cache_.has_projection = true; + RecalculateDerivedMatrices(); + } +} - return mvp_matrix * vertex; +void Shader::PrepareFragmentUniformCache() { + if (fragment_uniform_cache_.derived_valid) { + return; + } + if (uniformbuffer_.HasUniform("light") && + uniformbuffer_.HasUniform("cameraPos")) { + fragment_uniform_cache_.light = + uniformbuffer_.GetUniform("light"); + fragment_uniform_cache_.camera_pos = + uniformbuffer_.GetUniform("cameraPos"); + fragment_uniform_cache_.has_light = true; + fragment_uniform_cache_.has_camera = true; + RecalculateFragmentDerived(); + } +} + +auto Shader::BuildSpecularLUT(float shininess) const -> SpecularLUT { + SpecularLUT lut; + if (shininess <= 0.0f) { + lut.values.fill(1.0f); + return lut; + } + + for (size_t i = 0; i < kSpecularLutResolution; ++i) { + float cos_theta = static_cast(i) / + static_cast(kSpecularLutResolution - 1); + lut.values[i] = cos_theta <= 0.0f ? 0.0f : std::pow(cos_theta, shininess); + } + return lut; +} + +auto Shader::GetSpecularLUT(float shininess) const -> const SpecularLUT& { + uint32_t key = std::bit_cast(shininess); + { + std::shared_lock lock(specular_cache_mutex_); + auto it = specular_lut_cache_.find(key); + if (it != specular_lut_cache_.end()) { + return it->second; + } + } + + SpecularLUT lut = BuildSpecularLUT(shininess); + std::unique_lock lock(specular_cache_mutex_); + auto [it, inserted] = specular_lut_cache_.emplace(key, std::move(lut)); + return it->second; +} + +auto Shader::EvaluateSpecular(float cos_theta, float shininess) const -> float { + cos_theta = std::clamp(cos_theta, 0.0f, 1.0f); + if (shininess <= 0.0f) { + return 1.0f; + } + if (cos_theta <= 0.0f) { + return 0.0f; + } + + const auto& lut = GetSpecularLUT(shininess); + float scaled = cos_theta * static_cast(kSpecularLutResolution - 1); + size_t index = static_cast(scaled); + float frac = scaled - static_cast(index); + + const float v0 = lut.values[index]; + const float v1 = lut.values[std::min(index + 1, kSpecularLutResolution - 1)]; + return v0 + (v1 - v0) * frac; } Color Shader::FragmentShader(const Fragment& fragment) const { @@ -24,14 +265,23 @@ Color Shader::FragmentShader(const Fragment& fragment) const { Vector2f uv = fragment.uv; // uniform - Light light = uniformbuffer_.GetUniform("light"); + Light light; + Vector3f light_dir; + Vector3f camera_pos; + if (fragment_uniform_cache_.derived_valid) { + light = fragment_uniform_cache_.light; + light_dir = fragment_uniform_cache_.light_dir_normalized; + camera_pos = fragment_uniform_cache_.camera_pos; + } else { + light = uniformbuffer_.GetUniform("light"); + camera_pos = uniformbuffer_.GetUniform("cameraPos"); + light_dir = glm::normalize(light.dir); + } Material material = *fragment.material; // view direction Vector3f view_dir = - glm::normalize(sharedDataInShader_.fragPos_varying - - uniformbuffer_.GetUniform("cameraPos")); - Vector3f light_dir = glm::normalize(light.dir); + glm::normalize(sharedDataInShader_.fragPos_varying - camera_pos); auto intensity = std::max(glm::dot(normal, light_dir), 0.0f); // texture color @@ -51,8 +301,8 @@ Color Shader::FragmentShader(const Fragment& fragment) const { } Vector3f halfVector = glm::normalize(light_dir + view_dir); - float spec = std::pow(std::max(glm::dot(normal, halfVector), 0.0f), - material.shininess); + float cos_theta = std::max(glm::dot(normal, halfVector), 0.0f); + float spec = EvaluateSpecular(cos_theta, material.shininess); if (material.has_specular_texture) { Color texture_color = SampleTexture(material.specular_texture, uv); specular_color = texture_color * spec; @@ -108,4 +358,4 @@ Color Shader::ClampColor(const Color color) const { return Color(red, green, blue, alpha); } -} // namespace simple_renderer \ No newline at end of file +} // namespace simple_renderer diff --git a/test/system_test/main.cpp b/test/system_test/main.cpp index f75b29c..d6491d9 100755 --- a/test/system_test/main.cpp +++ b/test/system_test/main.cpp @@ -56,21 +56,20 @@ int main(int argc, char **argv) { } auto modelMatrix = simple_renderer::Matrix4f(1.0f); + simple_renderer::Matrix4f scale_matrix = glm::scale(simple_renderer::Matrix4f(1.0f), - simple_renderer::Vector3f(7.0f, 7.0f, 7.0f)); + simple_renderer::Vector3f(.02f, .02f, .02f)); - // Translation matrix simple_renderer::Matrix4f translation_matrix = glm::translate(simple_renderer::Matrix4f(1.0f), - simple_renderer::Vector3f(30.0f, 30.0f, 0.0f)); + simple_renderer::Vector3f(0.0f, -5.0f, 0.0f)); simple_renderer::Matrix4f rotation_matrix = - glm::rotate(simple_renderer::Matrix4f(1.0f), 90.0f, + glm::rotate(simple_renderer::Matrix4f(1.0f), glm::radians(-105.0f), simple_renderer::Vector3f(1.0f, 0.0f, 0.0f)); - // Combined transformation matrix - modelMatrix = scale_matrix * translation_matrix * rotation_matrix; + modelMatrix = scale_matrix* translation_matrix * rotation_matrix ; simple_renderer::Shader shader; shader.SetUniform("modelMatrix", modelMatrix); @@ -81,6 +80,14 @@ int main(int argc, char **argv) { simple_renderer::Camera camera(simple_renderer::Vector3f(0.0f, 0.0f, 1.0f)); + // 设置渲染模式(可选:PER_TRIANGLE、TILE_BASED 或 DEFERRED) + simple_renderer.SetRenderingMode(simple_renderer::RenderingMode::TILE_BASED); + + // 输出当前渲染模式 + std::string current_mode_name = simple_renderer::RenderingModeToString( + simple_renderer.GetRenderingMode()); + SPDLOG_INFO("当前渲染模式: {}", current_mode_name); + auto display = Display(kWidth, kHeight); display.loopBegin(); @@ -90,11 +97,11 @@ int main(int argc, char **argv) { shader.SetUniform("cameraPos", camera.GetPosition()); shader.SetUniform("viewMatrix", camera.GetViewMatrix()); shader.SetUniform("projectionMatrix", - camera.GetProjectionMatrix(60.0f, 1.0f, 0.1f, 100.0f)); + camera.GetProjectionMatrix(60.0f, static_cast(kWidth) / static_cast(kHeight), 0.1f, 100.0f)); buffer.ClearDrawBuffer(simple_renderer::Color::kBlack); for (auto &model : models) { - simple_renderer.Render(model, shader, buffer.GetDrawBuffer()); + simple_renderer.DrawModel(model, shader, buffer.GetDrawBuffer()); } buffer.SwapBuffer();