feat: implement Fence synchronization for CPU-GPU frame sync

D3D12 Core: - Add ID3D12Fence1 and fence_value to d3d12_command - Add fence_event for CPU waiting - Implement wait() in command_frame for frame sync - Implement flush() to wait all frames complete - Add fence_value tracking per frame - Signal fence at end_frame with incremented value TestRenderer: - Call graphics::render() in run() Documentation: - Add changelog for Fence sync implementation - Update D3D12 Wiki with Fence sync section
2026-03-27 18:56:03 +08:00
parent 7da17ccadd
commit f1584ec3c6
5 changed files with 287 additions and 16 deletions
--- a/Engine/Graphics/Direct3D12/D3D12Core.cpp
+++ b/Engine/Graphics/Direct3D12/D3D12Core.cpp
@@ -72,6 +72,9 @@ namespace {
 // - 这是一种防御性编程，确保对象始终处于有效状态
 class d3d12_command
 {
+public:
+	d3d12_command() = default;
+	DISABLE_COPY_AND_MOVE(d3d12_command)
    explicit d3d12_command(ID3D12Device8 *const device, D3D12_COMMAND_LIST_TYPE type)
    {
        HRESULT hr{ S_OK };
@@ -110,25 +113,36 @@ class d3d12_command
            type == D3D12_COMMAND_LIST_TYPE_COMPUTE ? 
            L"Compute Command List" : L" Command List");

+		DXCall(hr = device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&_fence)));
+        if(FAILED(hr)) goto _error;
+        NAME_D3D12_OBJECT(_fence, L"D3D12 Fence");
+
+		_fence_event = CreateEventEx(nullptr, nullptr, 0, EVENT_ALL_ACCESS);
+		assert(_fence_event);
+		
+		return;

    _error:
        release();
    }

-    void release()
+	~d3d12_command()
 	{
+		assert(!_cmd_queue && !_cmd_list && !_fence);
 	}

+	// 等待当前帧被标记为完成信号，并重置命令分配器和命令列表
    void begin_frame()
    {
        command_frame& frame{_cmd_frames[_frame_index]};
-        frame.wait();
+        frame.wait(_fence_event, _fence);
        // 重置命令分配器将释放之前帧分配的命令内存，使其可重新用于录制新帧的命令
        // 重置命令列表将命令列表重置为可录制状态，准备录制录制命令
        DXCall(frame.cmd_allocator->Reset());
        DXCall(_cmd_list->Reset(frame.cmd_allocator, nullptr));
    }

+	// 使用新的围栏值来标记这个围栏
    void end_frame()
    {
        //在提交命令列表前，先关闭命令列表，确保命令列表进入可提交状态
@@ -137,18 +151,66 @@ class d3d12_command
        // 虽然目前只有单个命令列表且为单线程工作模式，但仍采用数组方式以保持代码的扩展性
        ID3D12CommandList *const cmd_lists[]{_cmd_list};
        _cmd_queue->ExecuteCommandLists(_countof(cmd_lists), &cmd_lists[0]);
+
+		u64& fence_value{_fence_value};
+        ++fence_value;
+		command_frame& frame{_cmd_frames[_frame_index]};
+		frame.fence_value = fence_value;
+		_cmd_queue->Signal(_fence, fence_value);
        _frame_index = (_frame_index + 1) % frame_buffer_count;
    }

+	/**
+	 * @brief 等待所有帧的命令列表执行完成
+	 * @details 确保所有帧的命令列表执行完成，避免资源冲突
+	 */
+	void flush()
+	{
+		for(u32 i{ 0 }; i < frame_buffer_count; ++i)
+		{
+			_cmd_frames[i].wait(_fence_event, _fence);
+		}
+		_frame_index = 0;
+	}
+	     
+    void release()
+    {
+		flush();
+		core::release(_fence);
+		_fence_value = 0;
+
+		CloseHandle(_fence_event);
+		_fence_event = nullptr;
+
+		core::release(_cmd_queue);
+		core::release(_cmd_list);
+
+		for(u32 i{ 0 }; i < frame_buffer_count; ++i)
+		{
+			_cmd_frames[i].release();
+		}
+    }
+
+	constexpr ID3D12CommandQueue *const command_queue() const {return _cmd_queue;}
+	constexpr ID3D12GraphicsCommandList6 *const command_list() const {return _cmd_list;}
+	constexpr u32 frame_index() const {return _frame_index;}
 private:

    struct command_frame
    {
        ID3D12CommandAllocator* 	cmd_allocator{ nullptr };
+		u64 						fence_value{ 0 };

-        void wait()
+        void wait(HANDLE fence_event, ID3D12Fence1* fence)
        {
-
+			assert(fence && fence_event);
+			// 如果当前的Fence值小于目标值，说明GPU还没有执行完成当前的命令列表
+			if(fence->GetCompletedValue() < fence_value)
+			{
+				// 我们需要等待GPU执行当前的命令列表，设置事件并等待事件触发
+				DXCall(fence->SetEventOnCompletion(fence_value, fence_event));
+				WaitForSingleObject(fence_event, INFINITE);
+			}
        }

        void release()
@@ -159,7 +221,11 @@ private:

    ID3D12CommandQueue* 				_cmd_queue{ nullptr };
    ID3D12GraphicsCommandList6* 		_cmd_list{ nullptr };
+	ID3D12Fence1* 						_fence{ nullptr };
+	// 对于围栏值来说他是64位无符号整型,有2^64-1个值,即便每秒1000帧,也需要5.8亿年才能回绕,所以不需要担心一直递增导致溢出的问题
+	u64  								_fence_value{ 0 };
    command_frame 						_cmd_frames[frame_buffer_count]{};
+	HANDLE								_fence_event{ nullptr };
    u32 								_frame_index{ 0 };
 };

@@ -175,6 +241,12 @@ ID3D12Device8* main_device{ nullptr };
 */
 IDXGIFactory7* 		dxgi_factory{ nullptr };

+/**
+ * @brief 命令管理类实例
+ * @details 用于管理 Direct3D 12 命令队列和命令列表，提供类型安全的 GPU命令提交机制
+ */
+d3d12_command		gfx_command;
+
 // 最小支持的 Direct3D 特本级别
 constexpr D3D_FEATURE_LEVEL minumum_feature_level{ D3D_FEATURE_LEVEL_11_0 };

@@ -282,6 +354,15 @@ initialize()
 	// 为 Direct3D 12 设备设置名称
 	NAME_D3D12_OBJECT(main_device, L"Main Device");

+	// 使用 placement new 在已分配的内存上构造对象
+	// new (&gfx_command) 表示在 gfx_command 的地址处调用构造函数
+	// 这种用法允许我们在不分配新内存的情况下，在指定内存位置构造对象
+	// 常用于需要在特定内存地址构造对象，或重新初始化已存在的对象
+	// 这里 gfx_command 是一个类成员变量，我们直接在其内存位置上构造 d3d12_command 对象
+	// 避免了额外的内存分配，同时可以传递构造参数（main_device 和命令队列类型）
+	new (&gfx_command) d3d12_command(main_device, D3D12_COMMAND_LIST_TYPE_DIRECT);
+	if(!gfx_command.command_queue()) return failed_init();	
+
 #ifdef _DEBUG
 	{
 		ComPtr<ID3D12InfoQueue> info_queue;
@@ -299,6 +380,7 @@ initialize()
 void
 shutdown()
 {
+	gfx_command.release();
 	release(dxgi_factory);

    #ifdef _DEBUG
@@ -329,8 +411,14 @@ shutdown()
 void
 render()
 {
-	begin_frame();
+	// 等待GPU完成命令列表,并重置命令分配器和命令列表
+	gfx_command.begin_frame();
+	ID3D12GraphicsCommandList6* cmd_list{ gfx_command.command_list() };

-	end_frame();
+	// 记录命令
+	//
+	// 完成命令记录,立即提交命令列表到命令队列执行
+	// 为下一帧标记并增加围栏值
+	gfx_command.end_frame();
 }
 }// namespace XEngine::graphics::d3d12::core
--- a/EngineTest/TestRenderer.cpp
+++ b/EngineTest/TestRenderer.cpp
@@ -94,6 +94,7 @@ void
 engine_test::run()
 {
 	std::this_thread::sleep_for(std::chrono::milliseconds(10));
+	graphics::render();
 }

 bool
--- a/docs/changelogs/2026-03/20260327-d3d12-fence-sync.md
+++ b/docs/changelogs/2026-03/20260327-d3d12-fence-sync.md
@@ -0,0 +1,154 @@
+# 变更记录：Fence 同步机制实现
+
+**提交日期**: 2026-03-27  
+**提交哈希**: `b00a906`  
+**变更类型**: 功能实现
+
+---
+
+## 变更概述
+
+本次提交实现了 D3D12 Fence（围栏）同步机制，完成 CPU-GPU 帧同步，确保命令列表执行顺序正确，避免资源冲突。
+
+## 修改文件
+
+### Engine/Graphics/Direct3D12/
+
+| 文件 | 变更说明 |
+|------|----------|
+| `D3D12Core.cpp` | 添加 Fence 同步机制，实现 `wait()`、`flush()` 方法 |
+
+### EngineTest/
+
+| 文件 | 变更说明 |
+|------|----------|
+| `TestRenderer.cpp` | 在 `run()` 中调用 `graphics::render()` |
+
+---
+
+## 技术要点
+
+### 1. Fence 对象
+
+```cpp
+ID3D12Fence1* _fence{ nullptr };
+u64 _fence_value{ 0 };
+HANDLE _fence_event{ nullptr };
+```
+
+- **Fence**: GPU 可设置的计数器，用于同步
+- **Fence Value**: 64位无符号整数，每帧递增
+- **Fence Event**: Windows 事件对象，用于 CPU 等待
+
+### 2. command_frame 结构
+
+```cpp
+struct command_frame
+{
+    ID3D12CommandAllocator* cmd_allocator{ nullptr };
+    u64 fence_value{ 0 };  // 该帧的围栏值
+
+    void wait(HANDLE fence_event, ID3D12Fence1* fence);
+};
+```
+
+每帧记录其围栏值，用于判断 GPU 是否完成该帧。
+
+### 3. 帧同步等待
+
+```cpp
+void wait(HANDLE fence_event, ID3D12Fence1* fence)
+{
+    if(fence->GetCompletedValue() < fence_value)
+    {
+        fence->SetEventOnCompletion(fence_value, fence_event);
+        WaitForSingleObject(fence_event, INFINITE);
+    }
+}
+```
+
+- 检查 GPU 是否完成到目标围栏值
+- 未完成则设置事件并等待
+
+### 4. 帧结束信号
+
+```cpp
+void end_frame()
+{
+    // ... 提交命令列表 ...
+    ++_fence_value;
+    _cmd_frames[_frame_index].fence_value = _fence_value;
+    _cmd_queue->Signal(_fence, _fence_value);
+    _frame_index = (_frame_index + 1) % frame_buffer_count;
+}
+```
+
+- 递增围栏值
+- 记录当前帧的围栏值
+- 向 GPU 发送信号
+
+### 5. flush 方法
+
+```cpp
+void flush()
+{
+    for(u32 i{ 0 }; i < frame_buffer_count; ++i)
+    {
+        _cmd_frames[i].wait(_fence_event, _fence);
+    }
+    _frame_index = 0;
+}
+```
+
+等待所有帧完成，用于资源释放前确保 GPU 完成。
+
+---
+
+## 同步流程
+
+```
+begin_frame()
+    │
+    ├─► 检查当前帧的 fence_value
+    │
+    └─► 如果 GPU 未完成，CPU 等待
+            │
+            └─► 重置分配器和命令列表
+
+end_frame()
+    │
+    ├─► 提交命令列表
+    │
+    ├─► ++fence_value
+    │
+    ├─► 记录当前帧的 fence_value
+    │
+    ├─► Signal(fence, fence_value)
+    │
+    └─► 递增帧索引
+```
+
+---
+
+## 围栏值溢出问题
+
+```cpp
+// 64位无符号整数，即便每秒1000帧，也需要5.8亿年才能回绕
+u64 _fence_value{ 0 };
+```
+
+无需担心溢出问题。
+
+---
+
+## 后续工作
+
+- [ ] 交换链实现
+- [ ] 描述符堆
+- [ ] 渲染目标视图
+
+---
+
+## 相关文档
+
+- [D3D12学习Wiki](../wiki/D3D12学习Wiki.md)
--- a/docs/changelogs/README.md
+++ b/docs/changelogs/README.md
@@ -16,6 +16,7 @@ changelogs/

 | 日期 | 提交 | 变更内容 |
 |------|------|----------|
+| 2026-03-27 | [Fence同步机制](./2026-03/20260327-d3d12-fence-sync.md) | D3D12 Fence CPU-GPU 帧同步实现 |
 | 2026-03-26 | [命令队列与多帧缓冲](./2026-03/20260326-d3d12-command-queue.md) | D3D12 命令队列和多帧渲染架构 |
 | 2026-03-26 | [D3D12设备初始化](./2026-03/20260326-d3d12-device-init.md) | D3D12 设备创建与调试层实现 |
 | 2026-03-26 | [Graphics模块](./2026-03/20260326-d3d12-foundation.md) | Graphics 模块与 D3D12 后端框架 |
--- a/docs/wiki/D3D12学习Wiki.md
+++ b/docs/wiki/D3D12学习Wiki.md
@@ -211,6 +211,33 @@ _frame_index = (_frame_index + 1) % frame_buffer_count;

 环形缓冲区管理帧资源，确保 CPU 不会超前 GPU 超过 3 帧。

+### 6.4 Fence 同步机制
+
+项目实现了 Fence（围栏）同步，确保 CPU-GPU 帧同步：
+
+```cpp
+struct command_frame
+{
+    ID3D12CommandAllocator* cmd_allocator{ nullptr };
+    u64 fence_value{ 0 };  // 该帧的围栏值
+
+    void wait(HANDLE fence_event, ID3D12Fence1* fence);
+};
+```
+
+**同步流程**：
+1. `begin_frame()` - 检查 GPU 是否完成当前帧，未完成则等待
+2. `end_frame()` - 递增围栏值，向 GPU 发送信号
+
+```cpp
+// 帧结束信号
++_fence_value;
+_cmd_frames[_frame_index].fence_value = _fence_value;
+_cmd_queue->Signal(_fence, _fence_value);
+```
+
+**围栏值溢出**：64位无符号整数，每秒1000帧需要5.8亿年才回绕，无需担心。
+
 ## 7. 渲染表面与窗口

 ### 7.1 render_surface 结构