Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit a75dc33

Browse files
jvanverthSkia Commit-Bot
authored andcommitted
Metal: Hold refs for input buffers from bindBuffer calls.
Mirrors what we have in Vulkan and Direct3D. Also adds command buffer tracking, again like Vulkan and Direct3D. Change-Id: I2280d92274d81830aec7950afc64a0147e38c317 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/305396 Reviewed-by: Greg Daniel <[email protected]> Commit-Queue: Jim Van Verth <[email protected]>
1 parent 90787fe commit a75dc33

File tree

7 files changed

+105
-51
lines changed

7 files changed

+105
-51
lines changed

src/gpu/mtl/GrMtlBuffer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ class GrMtlBuffer: public GrGpuBuffer {
2525

2626
id<MTLBuffer> mtlBuffer() const { return fMtlBuffer; }
2727
size_t offset() const { return fOffset; }
28-
void bind(); // for initial binding of XferGpuToCpu buffers
2928

3029
protected:
3130
GrMtlBuffer(GrMtlGpu*, size_t size, GrGpuBufferType intendedType, GrAccessPattern);

src/gpu/mtl/GrMtlBuffer.mm

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,21 +35,26 @@
3535
: INHERITED(gpu, size, intendedType, accessPattern)
3636
, fIsDynamic(accessPattern != kStatic_GrAccessPattern)
3737
, fOffset(0) {
38-
// In most cases, we'll allocate dynamic buffers when we map them, below.
39-
if (!fIsDynamic) {
40-
NSUInteger options = 0;
41-
if (@available(macOS 10.11, iOS 9.0, *)) {
38+
NSUInteger options = 0;
39+
if (@available(macOS 10.11, iOS 9.0, *)) {
40+
if (fIsDynamic) {
41+
#ifdef SK_BUILD_FOR_MAC
42+
options |= MTLResourceStorageModeManaged;
43+
#else
44+
options |= MTLResourceStorageModeShared;
45+
#endif
46+
} else {
4247
options |= MTLResourceStorageModePrivate;
4348
}
49+
}
4450
#ifdef SK_BUILD_FOR_MAC
45-
// Mac requires 4-byte alignment for copies so we need
46-
// to ensure we have space for the extra data
47-
size = SkAlign4(size);
51+
// Mac requires 4-byte alignment for copies so we need
52+
// to ensure we have space for the extra data
53+
size = SkAlign4(size);
4854
#endif
49-
fMtlBuffer = size == 0 ? nil :
50-
[gpu->device() newBufferWithLength: size
51-
options: options];
52-
}
55+
fMtlBuffer = size == 0 ? nil :
56+
[gpu->device() newBufferWithLength: size
57+
options: options];
5358
this->registerWithCache(SkBudgeted::kYes);
5459
VALIDATE();
5560
}
@@ -60,11 +65,6 @@
6065
SkASSERT(fMapPtr == nullptr);
6166
}
6267

63-
void GrMtlBuffer::bind() {
64-
SkASSERT(fIsDynamic && GrGpuBufferType::kXferGpuToCpu == this->intendedType());
65-
fMtlBuffer = this->mtlGpu()->resourceProvider().getDynamicBuffer(this->size(), &fOffset);
66-
}
67-
6868
bool GrMtlBuffer::onUpdateData(const void* src, size_t srcInBytes) {
6969
if (!fIsDynamic) {
7070
if (fMtlBuffer == nil) {
@@ -122,9 +122,6 @@
122122
VALIDATE();
123123
SkASSERT(!this->isMapped());
124124
if (fIsDynamic) {
125-
if (GrGpuBufferType::kXferGpuToCpu != this->intendedType()) {
126-
fMtlBuffer = this->mtlGpu()->resourceProvider().getDynamicBuffer(sizeInBytes, &fOffset);
127-
}
128125
fMappedBuffer = fMtlBuffer;
129126
fMapPtr = static_cast<char*>(fMtlBuffer.contents) + fOffset;
130127
} else {

src/gpu/mtl/GrMtlCommandBuffer.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,16 @@
1111
#import <Metal/Metal.h>
1212

1313
#include "include/core/SkRefCnt.h"
14+
#include "src/gpu/GrBuffer.h"
1415
#include "src/gpu/mtl/GrMtlUtil.h"
1516

1617
class GrMtlGpu;
1718
class GrMtlPipelineState;
1819
class GrMtlOpsRenderPass;
1920

20-
class GrMtlCommandBuffer {
21+
class GrMtlCommandBuffer : public SkRefCnt {
2122
public:
22-
static GrMtlCommandBuffer* Create(id<MTLCommandQueue> queue);
23+
static sk_sp<GrMtlCommandBuffer> Make(id<MTLCommandQueue> queue);
2324
~GrMtlCommandBuffer();
2425

2526
void commit(bool waitUntilCompleted);
@@ -33,10 +34,16 @@ class GrMtlCommandBuffer {
3334
[fCmdBuffer addCompletedHandler:block];
3435
}
3536

37+
void addGrBuffer(sk_sp<const GrBuffer> buffer) {
38+
fTrackedGrBuffers.push_back(std::move(buffer));
39+
}
40+
3641
void encodeSignalEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
3742
void encodeWaitForEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
3843

3944
private:
45+
static const int kInitialTrackedResourcesCount = 32;
46+
4047
GrMtlCommandBuffer(id<MTLCommandBuffer> cmdBuffer)
4148
: fCmdBuffer(cmdBuffer)
4249
, fPreviousRenderPassDescriptor(nil) {}
@@ -47,6 +54,8 @@ class GrMtlCommandBuffer {
4754
id<MTLBlitCommandEncoder> fActiveBlitCommandEncoder;
4855
id<MTLRenderCommandEncoder> fActiveRenderCommandEncoder;
4956
MTLRenderPassDescriptor* fPreviousRenderPassDescriptor;
57+
58+
SkSTArray<kInitialTrackedResourcesCount, sk_sp<const GrBuffer>> fTrackedGrBuffers;
5059
};
5160

5261
#endif

src/gpu/mtl/GrMtlCommandBuffer.mm

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#error This file must be compiled with Arc. Use -fobjc-arc flag
1616
#endif
1717

18-
GrMtlCommandBuffer* GrMtlCommandBuffer::Create(id<MTLCommandQueue> queue) {
18+
sk_sp<GrMtlCommandBuffer> GrMtlCommandBuffer::Make(id<MTLCommandQueue> queue) {
1919
id<MTLCommandBuffer> mtlCommandBuffer;
2020
mtlCommandBuffer = [queue commandBuffer];
2121
if (nil == mtlCommandBuffer) {
@@ -24,11 +24,12 @@
2424

2525
mtlCommandBuffer.label = @"GrMtlCommandBuffer::Create";
2626

27-
return new GrMtlCommandBuffer(mtlCommandBuffer);
27+
return sk_sp<GrMtlCommandBuffer>(new GrMtlCommandBuffer(mtlCommandBuffer));
2828
}
2929

3030
GrMtlCommandBuffer::~GrMtlCommandBuffer() {
3131
this->endAllEncoding();
32+
fTrackedGrBuffers.reset();
3233
fCmdBuffer = nil;
3334
}
3435

src/gpu/mtl/GrMtlGpu.h

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
#ifndef GrMtlGpu_DEFINED
99
#define GrMtlGpu_DEFINED
1010

11+
#include "include/private/SkDeque.h"
1112
#include "src/gpu/GrFinishCallbacks.h"
1213
#include "src/gpu/GrGpu.h"
1314
#include "src/gpu/GrRenderTarget.h"
1415
#include "src/gpu/GrSemaphore.h"
1516
#include "src/gpu/GrTexture.h"
1617

1718
#include "src/gpu/mtl/GrMtlCaps.h"
19+
#include "src/gpu/mtl/GrMtlCommandBuffer.h"
1820
#include "src/gpu/mtl/GrMtlResourceProvider.h"
1921
#include "src/gpu/mtl/GrMtlStencilAttachment.h"
2022
#include "src/gpu/mtl/GrMtlUtil.h"
@@ -52,11 +54,6 @@ class GrMtlGpu : public GrGpu {
5254
kSkip_SyncQueue
5355
};
5456

55-
// Commits the current command buffer to the queue and then creates a new command buffer. If
56-
// sync is set to kForce_SyncQueue, the function will wait for all work in the committed
57-
// command buffer to finish before returning.
58-
void submitCommandBuffer(SyncQueue sync);
59-
6057
void deleteBackendTexture(const GrBackendTexture&) override;
6158

6259
bool compile(const GrProgramDesc&, const GrProgramInfo&) override;
@@ -214,6 +211,13 @@ class GrMtlGpu : public GrGpu {
214211

215212
bool onSubmitToGpu(bool syncCpu) override;
216213

214+
// Commits the current command buffer to the queue and then creates a new command buffer. If
215+
// sync is set to kForce_SyncQueue, the function will wait for all work in the committed
216+
// command buffer to finish before returning.
217+
void submitCommandBuffer(SyncQueue sync);
218+
219+
void checkForFinishedCommandBuffers();
220+
217221
// Function that uploads data onto textures with private storage mode (GPU access only).
218222
bool uploadToTexture(GrMtlTexture* tex, int left, int top, int width, int height,
219223
GrColorType dataColorType, const GrMipLevel texels[], int mipLevels);
@@ -247,7 +251,16 @@ class GrMtlGpu : public GrGpu {
247251
id<MTLDevice> fDevice;
248252
id<MTLCommandQueue> fQueue;
249253

250-
GrMtlCommandBuffer* fCmdBuffer;
254+
sk_sp<GrMtlCommandBuffer> fCurrentCmdBuffer;
255+
256+
struct OutstandingCommandBuffer {
257+
OutstandingCommandBuffer(sk_sp<GrMtlCommandBuffer> commandBuffer, GrFence fence)
258+
: fCommandBuffer(std::move(commandBuffer))
259+
, fFence(fence) {}
260+
sk_sp<GrMtlCommandBuffer> fCommandBuffer;
261+
GrFence fFence;
262+
};
263+
SkDeque fOutstandingCommandBuffers;
251264

252265
std::unique_ptr<SkSL::Compiler> fCompiler;
253266

src/gpu/mtl/GrMtlGpu.mm

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,18 @@ static bool get_feature_set(id<MTLDevice> device, MTLFeatureSet* featureSet) {
112112
return sk_sp<GrGpu>(new GrMtlGpu(direct, options, device, queue, featureSet));
113113
}
114114

115+
// This constant determines how many OutstandingCommandBuffers are allocated together as a block in
116+
// the deque. As such it needs to balance allocating too much memory vs. incurring
117+
// allocation/deallocation thrashing. It should roughly correspond to the max number of outstanding
118+
// command buffers we expect to see.
119+
static const int kDefaultOutstandingAllocCnt = 8;
120+
115121
GrMtlGpu::GrMtlGpu(GrDirectContext* direct, const GrContextOptions& options,
116122
id<MTLDevice> device, id<MTLCommandQueue> queue, MTLFeatureSet featureSet)
117123
: INHERITED(direct)
118124
, fDevice(device)
119125
, fQueue(queue)
120-
, fCmdBuffer(nullptr)
126+
, fOutstandingCommandBuffers(sizeof(OutstandingCommandBuffer), kDefaultOutstandingAllocCnt)
121127
, fCompiler(new SkSL::Compiler())
122128
, fResourceProvider(this)
123129
, fDisconnected(false)
@@ -135,24 +141,25 @@ static bool get_feature_set(id<MTLDevice> device, MTLFeatureSet* featureSet) {
135141
void GrMtlGpu::disconnect(DisconnectType type) {
136142
INHERITED::disconnect(type);
137143

138-
if (DisconnectType::kCleanup == type) {
144+
if (!fDisconnected) {
139145
this->destroyResources();
140-
} else {
141-
delete fCmdBuffer;
142-
fCmdBuffer = nullptr;
143-
144-
fResourceProvider.destroyResources();
145-
146-
fQueue = nil;
147-
fDevice = nil;
148-
149146
fDisconnected = true;
150147
}
151148
}
152149

153150
void GrMtlGpu::destroyResources() {
154-
// Will implicitly delete the command buffer
155151
this->submitCommandBuffer(SyncQueue::kForce_SyncQueue);
152+
153+
// We used a placement new for each object in fOutstandingCommandBuffers, so we're responsible
154+
// for calling the destructor on each of them as well.
155+
while (!fOutstandingCommandBuffers.empty()) {
156+
OutstandingCommandBuffer* buffer =
157+
(OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
158+
this->deleteFence(buffer->fFence);
159+
buffer->~OutstandingCommandBuffer();
160+
fOutstandingCommandBuffers.pop_front();
161+
}
162+
156163
fResourceProvider.destroyResources();
157164

158165
fQueue = nil;
@@ -175,18 +182,44 @@ static bool get_feature_set(id<MTLDevice> device, MTLFeatureSet* featureSet) {
175182
}
176183

177184
GrMtlCommandBuffer* GrMtlGpu::commandBuffer() {
178-
if (!fCmdBuffer) {
179-
fCmdBuffer = GrMtlCommandBuffer::Create(fQueue);
185+
if (!fCurrentCmdBuffer) {
186+
fCurrentCmdBuffer = GrMtlCommandBuffer::Make(fQueue);
187+
188+
// This should be done after we have a new command buffer in case the freeing of any
189+
// resources held by a finished command buffer causes us to send a new command to the gpu
190+
// (like changing the resource state).
191+
this->checkForFinishedCommandBuffers();
180192
}
181-
return fCmdBuffer;
193+
return fCurrentCmdBuffer.get();
182194
}
183195

184196
void GrMtlGpu::submitCommandBuffer(SyncQueue sync) {
185-
if (fCmdBuffer) {
186-
fResourceProvider.addBufferCompletionHandler(fCmdBuffer);
187-
fCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
188-
delete fCmdBuffer;
189-
fCmdBuffer = nullptr;
197+
// TODO: handle sync with empty command buffer
198+
if (fCurrentCmdBuffer) {
199+
fResourceProvider.addBufferCompletionHandler(fCurrentCmdBuffer.get());
200+
201+
GrFence fence = this->insertFence();
202+
new (fOutstandingCommandBuffers.push_back()) OutstandingCommandBuffer(
203+
fCurrentCmdBuffer, fence);
204+
205+
fCurrentCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
206+
fCurrentCmdBuffer.reset();
207+
}
208+
}
209+
210+
void GrMtlGpu::checkForFinishedCommandBuffers() {
211+
// Iterate over all the outstanding command buffers to see if any have finished. The command
212+
// buffers are in order from oldest to newest, so we start at the front to check if their fence
213+
// has signaled. If so we pop it off and move onto the next.
214+
// Repeat till we find a command list that has not finished yet (and all others afterwards are
215+
// also guaranteed to not have finished).
216+
OutstandingCommandBuffer* front = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
217+
while (front && this->waitFence(front->fFence)) {
218+
// Since we used placement new we are responsible for calling the destructor manually.
219+
this->deleteFence(front->fFence);
220+
front->~OutstandingCommandBuffer();
221+
fOutstandingCommandBuffers.pop_front();
222+
front = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
190223
}
191224
}
192225

@@ -1254,7 +1287,6 @@ static int get_surface_sample_cnt(GrSurface* surf) {
12541287
}
12551288

12561289
GrMtlBuffer* grMtlBuffer = static_cast<GrMtlBuffer*>(transferBuffer);
1257-
grMtlBuffer->bind();
12581290

12591291
size_t transBufferRowBytes = bpp * width;
12601292
size_t transBufferImageBytes = transBufferRowBytes * height;

src/gpu/mtl/GrMtlOpsRenderPass.mm

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,18 +264,21 @@ static MTLPrimitiveType gr_to_mtl_primitive(GrPrimitiveType primitiveType) {
264264
SkASSERT(!vertexBuffer->isCpuBuffer());
265265
SkASSERT(!static_cast<const GrGpuBuffer*>(vertexBuffer.get())->isMapped());
266266
fActiveVertexBuffer = std::move(vertexBuffer);
267+
fGpu->commandBuffer()->addGrBuffer(fActiveVertexBuffer);
267268
++inputBufferIndex;
268269
}
269270
if (instanceBuffer) {
270271
SkASSERT(!instanceBuffer->isCpuBuffer());
271272
SkASSERT(!static_cast<const GrGpuBuffer*>(instanceBuffer.get())->isMapped());
272273
this->setVertexBuffer(fActiveRenderCmdEncoder, instanceBuffer.get(), 0, inputBufferIndex++);
273274
fActiveInstanceBuffer = std::move(instanceBuffer);
275+
fGpu->commandBuffer()->addGrBuffer(fActiveInstanceBuffer);
274276
}
275277
if (indexBuffer) {
276278
SkASSERT(!indexBuffer->isCpuBuffer());
277279
SkASSERT(!static_cast<const GrGpuBuffer*>(indexBuffer.get())->isMapped());
278280
fActiveIndexBuffer = std::move(indexBuffer);
281+
fGpu->commandBuffer()->addGrBuffer(fActiveIndexBuffer);
279282
}
280283
}
281284

0 commit comments

Comments
 (0)