Skip to content
This repository was archived by the owner on Aug 15, 2019. It is now read-only.

Commit 0c9f900

Browse files
authored
Improve dl.time (#715)
`dl.time()` now returns an object with the following properties (all times in ms): - `wallMs`: wall execution time. - `kernelMs`: kernel execution time, ignoring data transfer. - On `WebGL` the following additional properties exist: - `uploadWaitMs`: cpu blocking time on texture uploads. - `downloadWaitMs`: cpu blocking time on texture downloads (readPixels).
1 parent 0627b39 commit 0c9f900

File tree

10 files changed

+158
-38
lines changed

10 files changed

+158
-38
lines changed

src/engine.ts

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import {ENV} from './environment';
1919
import {tidy} from './globals';
20-
import {KernelBackend} from './kernels/backend';
20+
import {BackendTimingInfo, KernelBackend} from './kernels/backend';
2121
import * as kernel_registry from './kernels/kernel_registry';
2222
import {KernelConfigRegistry} from './kernels/kernel_registry';
2323
import * as ops from './ops/ops';
@@ -54,10 +54,12 @@ export interface TensorManager {
5454
}
5555

5656
export type MemoryInfo = {
57-
numTensors: number; numDataBuffers: number; numBytes: number; backendInfo: {};
57+
numTensors: number; numDataBuffers: number; numBytes: number;
5858
unreliable?: boolean;
5959
};
6060

61+
export interface TimingInfo extends BackendTimingInfo { wallMs: number; }
62+
6163
export class Engine implements TensorManager {
6264
// Public since optimizers will use it.
6365
registeredVariables: NamedVariableMap = {};
@@ -168,17 +170,11 @@ export class Engine implements TensorManager {
168170
}
169171

170172
memory(): MemoryInfo {
171-
const backendInfo = this.backend.memory();
172-
const memInfo: MemoryInfo = {
173-
numTensors: this.numTensors,
174-
numDataBuffers: this.numDataBuffers,
175-
numBytes: this.numBytes,
176-
backendInfo,
177-
};
178-
if (backendInfo.unreliable) {
179-
memInfo.unreliable = true;
180-
}
181-
return memInfo;
173+
const info = this.backend.memory() as MemoryInfo;
174+
info.numTensors = this.numTensors;
175+
info.numDataBuffers = this.numDataBuffers;
176+
info.numBytes = this.numBytes;
177+
return info;
182178
}
183179

184180
private shouldRecord(): boolean {
@@ -358,8 +354,11 @@ export class Engine implements TensorManager {
358354
numChannels: number): Tensor3D {
359355
return this.backend.fromPixels(pixels, numChannels);
360356
}
361-
time(query: () => void): Promise<number> {
362-
return this.backend.time(query);
357+
async time(query: () => void): Promise<TimingInfo> {
358+
const start = performance.now();
359+
const timingInfo = await this.backend.time(query) as TimingInfo;
360+
timingInfo.wallMs = performance.now() - start;
361+
return timingInfo;
363362
}
364363

365364
/**

src/environment.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ export class Environment {
254254
* - On CPU, true. Due to automatic garbage collection, these numbers
255255
* represent undisposed tensors, i.e. not wrapped in `dl.tidy()`, or
256256
* lacking a call to `tensor.dispose()`.
257-
* - `backendInfo`: Backend-specific information.
258257
*/
259258
@doc({heading: 'Performance', subheading: 'Memory'})
260259
static memory(): MemoryInfo {

src/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ export {ConstantInitializer, Initializer, OnesInitializer, RandomNormalInitializ
4141
export {AdamaxOptimizer} from './graph/optimizers/adamax_optimizer';
4242
export {CostReduction, FeedEntry, Session} from './graph/session';
4343
export {MathBackendCPU, NDArrayMathCPU} from './kernels/backend_cpu';
44-
export {MathBackendWebGL, NDArrayMathGPU} from './kernels/backend_webgl';
44+
// tslint:disable-next-line:max-line-length
45+
export {MathBackendWebGL, NDArrayMathGPU, WebGLTimingInfo} from './kernels/backend_webgl';
4546
export {MatrixOrientation} from './kernels/types/matmul';
4647
export {GPGPUContext} from './kernels/webgl/gpgpu_context';
4748
export {NDArrayMath} from './math';

src/kernels/backend.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ import {Conv2DInfo} from '../ops/conv_util';
2121
import {DataId, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D} from '../tensor';
2222
import {DataType, Rank, TypedArray} from '../types';
2323

24+
// Required information for all backends.
25+
export interface BackendTimingInfo { kernelMs: number; }
26+
2427
export interface TensorStorage {
2528
read(dataId: DataId): Promise<TypedArray>;
2629
readSync(dataId: DataId): TypedArray;
@@ -29,12 +32,13 @@ export interface TensorStorage {
2932
fromPixels(
3033
pixels: ImageData|HTMLImageElement|HTMLCanvasElement|HTMLVideoElement,
3134
numChannels: number): Tensor3D;
32-
time(query: () => void): Promise<number>;
3335
register(dataId: DataId, shape: number[], dtype: DataType): void;
3436
memory(): {unreliable: boolean;}; // Backend-specific information.
3537
}
3638

37-
export interface BackendTimer { time(f: () => void): Promise<number>; }
39+
export interface BackendTimer {
40+
time(f: () => void): Promise<BackendTimingInfo>;
41+
}
3842

3943
/**
4044
* The interface that defines the kernels that should be implemented when

src/kernels/backend_cpu.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import {DataId, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D} from '../tensor'
3030
import * as types from '../types';
3131
import {DataType, DataTypeMap, Rank, TypedArray} from '../types';
3232
import * as util from '../util';
33-
import {KernelBackend} from './backend';
33+
import {BackendTimingInfo, KernelBackend} from './backend';
3434

3535
export class MathBackendCPU implements KernelBackend {
3636
private data = new WeakMap<DataId, DataTypeMap[DataType]>();
@@ -117,10 +117,11 @@ export class MathBackendCPU implements KernelBackend {
117117
}
118118
}
119119

120-
async time(f: () => void): Promise<number> {
120+
async time(f: () => void): Promise<BackendTimingInfo> {
121121
const start = performance.now();
122122
f();
123-
return performance.now() - start;
123+
const kernelMs = performance.now() - start;
124+
return {kernelMs};
124125
}
125126
memory() {
126127
return {

src/kernels/backend_webgl.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* =============================================================================
1616
*/
1717

18+
import {TimingInfo} from '../engine';
1819
import {ENV} from '../environment';
1920
import {NDArrayMath} from '../math';
2021
import * as axis_util from '../ops/axis_util';
@@ -26,6 +27,7 @@ import * as types from '../types';
2627
// tslint:disable-next-line:max-line-length
2728
import {DataType, DataTypeMap, Rank, RecursiveArray, TypedArray} from '../types';
2829
import * as util from '../util';
30+
2931
import {KernelBackend} from './backend';
3032
import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
3133
import {AvgPool2DBackpropProgram} from './webgl/avg_pool_backprop_gpu';
@@ -70,12 +72,21 @@ export interface CPUTimerQuery {
7072
endMs?: number;
7173
}
7274

75+
export interface WebGLTimingInfo extends TimingInfo {
76+
uploadWaitMs: number;
77+
downloadWaitMs: number;
78+
}
79+
7380
export class MathBackendWebGL implements KernelBackend {
7481
private texData = new WeakMap<DataId, TextureData>();
7582
private canvas: HTMLCanvasElement;
7683

7784
private programTimersStack: TimerNode[];
7885
private activeTimers: TimerNode[];
86+
// Accumulated time spent (including blocking) in uploading data to webgl.
87+
private uploadWaitMs = 0;
88+
// Accumulated time spent (including blocking in downloading data from webgl.
89+
private downloadWaitMs = 0;
7990

8091
register(dataId: DataId, shape: number[], dtype: DataType): void {
8192
if (this.texData.has(dataId)) {
@@ -152,8 +163,16 @@ export class MathBackendWebGL implements KernelBackend {
152163
this.cacheOnCPU(dataId);
153164
return values;
154165
}
166+
const shouldTimeProgram = this.activeTimers != null;
167+
let start: number;
168+
if (shouldTimeProgram) {
169+
start = performance.now();
170+
}
155171
const float32Values =
156172
this.gpgpu.downloadMatrixFromTexture(texture, texShape[0], texShape[1]);
173+
if (shouldTimeProgram) {
174+
this.downloadWaitMs += performance.now() - start;
175+
}
157176
this.cacheOnCPU(dataId, float32Values);
158177
return texData.values;
159178
}
@@ -182,7 +201,7 @@ export class MathBackendWebGL implements KernelBackend {
182201
return this.readSync(dataId);
183202
}
184203

185-
async time(f: () => void): Promise<number> {
204+
async time(f: () => void): Promise<WebGLTimingInfo> {
186205
const oldActiveTimers = this.activeTimers;
187206
const newActiveTimers: TimerNode[] = [];
188207

@@ -204,11 +223,20 @@ export class MathBackendWebGL implements KernelBackend {
204223
this.programTimersStack = null;
205224
}
206225

207-
return Promise.all(flattenedActiveTimers).then(results => {
226+
const kernelMs = await Promise.all(flattenedActiveTimers).then(results => {
208227
let sum = 0;
209228
results.forEach(result => sum += result);
210229
return sum;
211230
});
231+
const res: WebGLTimingInfo = {
232+
uploadWaitMs: this.uploadWaitMs,
233+
downloadWaitMs: this.downloadWaitMs,
234+
kernelMs,
235+
wallMs: null // will be filled by the engine
236+
};
237+
this.uploadWaitMs = 0;
238+
this.downloadWaitMs = 0;
239+
return res;
212240
}
213241
memory() {
214242
return {unreliable: false};
@@ -933,6 +961,11 @@ export class MathBackendWebGL implements KernelBackend {
933961
// Array is already on GPU. No-op.
934962
return;
935963
}
964+
const shouldTimeProgram = this.activeTimers != null;
965+
let start: number;
966+
if (shouldTimeProgram) {
967+
start = performance.now();
968+
}
936969
const texShape =
937970
webgl_util.getTextureShapeFromLogicalShape(this.gpgpu.gl, shape);
938971
texData.texShape = texShape;
@@ -945,6 +978,9 @@ export class MathBackendWebGL implements KernelBackend {
945978
texShape[1], typedArrayToFloat32(values, dtype));
946979
// Once uploaded, don't store the values on cpu.
947980
texData.values = null;
981+
if (shouldTimeProgram) {
982+
this.uploadWaitMs += performance.now() - start;
983+
}
948984
}
949985
}
950986

src/profiler.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@
1515
* =============================================================================
1616
*/
1717

18-
import * as util from './util';
19-
import {Tensor} from './tensor';
20-
import {TypedArray} from './types';
21-
2218
import {BackendTimer} from './kernels/backend';
2319
import {Kernel} from './kernels/kernel_registry';
20+
import {Tensor} from './tensor';
21+
import {TypedArray} from './types';
22+
import * as util from './util';
2423

2524
export class Profiler {
2625
constructor(private backendTimer: BackendTimer, private logger?: Logger) {
@@ -39,8 +38,8 @@ export class Profiler {
3938
const vals = result.dataSync();
4039
util.checkForNaN(vals, result.dtype, kernelName);
4140

42-
timer.then((timeMs: number) => {
43-
this.logger.logKernelProfile(kernelName, result, vals, timeMs);
41+
timer.then(timing => {
42+
this.logger.logKernelProfile(kernelName, result, vals, timing.kernelMs);
4443
});
4544

4645
return result as T;

src/profiler_test.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,22 @@
1616
*/
1717

1818
import * as dl from './index';
19-
import {Tensor} from './tensor';
20-
import {BackendTimer} from './kernels/backend';
19+
import {BackendTimer, BackendTimingInfo} from './kernels/backend';
2120
import {Kernel} from './kernels/kernel_registry';
22-
import {Logger, Profiler} from './profiler';
2321
import {TypedArray} from './kernels/webgl/tex_util';
22+
import {Logger, Profiler} from './profiler';
23+
import {Tensor} from './tensor';
2424

2525
class TestBackendTimer implements BackendTimer {
2626
private counter = 1;
2727
constructor(private delayMs: number, private queryTimeMs: number) {}
2828

29-
time(query: () => void): Promise<number> {
29+
async time(query: () => void): Promise<BackendTimingInfo> {
3030
query();
31-
return new Promise<number>(
31+
const kernelMs = await new Promise<number>(
3232
resolve => setTimeout(
3333
resolve(this.queryTimeMs * this.counter++), this.delayMs));
34+
return {kernelMs};
3435
}
3536
}
3637

src/tracking.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717

1818
import {doc} from './doc';
19+
import {TimingInfo} from './engine';
1920
import {ENV} from './environment';
2021
// tslint:disable-next-line:max-line-length
2122
import {ScopeFn, ScopeResult, ScopeResultImmediate} from './tape_util';
@@ -88,12 +89,20 @@ export class Tracking {
8889
}
8990

9091
/**
91-
* Executes `f()` and returns a promise that resolves with the elapsed time of
92-
* `f()` in milliseconds.
92+
* Executes `f()` and returns a promise that resolves with timing information.
93+
*
94+
* The result is an object with the following properties:
95+
*
96+
* - `wallMs`: wall execution time.
97+
* - `kernelMs`: kernel execution time, ignoring data transfer.
98+
* - On `WebGL` the following additional properties exist:
99+
* - `uploadWaitMs`: cpu blocking time on texture uploads.
100+
* - `downloadWaitMs`: cpu blocking time on texture downloads (readPixels).
101+
*
93102
* @param f The function to execute and time.
94103
*/
95104
@doc({heading: 'Performance', subheading: 'Timing'})
96-
static time(f: () => void): Promise<number> {
105+
static time(f: () => void): Promise<TimingInfo> {
97106
return ENV.engine.time(f);
98107
}
99108
}

src/tracking_test.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* @license
3+
* Copyright 2018 Google Inc. All Rights Reserved.
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
* =============================================================================
16+
*/
17+
18+
import * as dl from './index';
19+
import {CPU_ENVS, describeWithFlags, WEBGL_ENVS} from './test_util';
20+
21+
describeWithFlags('time webgl', WEBGL_ENVS, () => {
22+
it('upload + compute', async () => {
23+
const a = dl.zeros([10, 10]);
24+
const time = await dl.time(() => a.square()) as dl.WebGLTimingInfo;
25+
expect(time.uploadWaitMs > 0);
26+
expect(time.downloadWaitMs === 0);
27+
expect(time.kernelMs > 0);
28+
expect(time.wallMs >= time.kernelMs);
29+
});
30+
31+
it('upload + compute + dataSync', async () => {
32+
const a = dl.zeros([10, 10]);
33+
const time =
34+
await dl.time(() => a.square().dataSync()) as dl.WebGLTimingInfo;
35+
expect(time.uploadWaitMs > 0);
36+
expect(time.downloadWaitMs > 0);
37+
expect(time.kernelMs > 0);
38+
expect(time.wallMs >= time.kernelMs);
39+
});
40+
41+
it('upload + compute + data', async () => {
42+
const a = dl.zeros([10, 10]);
43+
const time = await dl.time(async () => await a.square().data()) as
44+
dl.WebGLTimingInfo;
45+
expect(time.uploadWaitMs > 0);
46+
expect(time.downloadWaitMs > 0);
47+
expect(time.kernelMs > 0);
48+
expect(time.wallMs >= time.kernelMs);
49+
});
50+
51+
it('preupload (not included) + compute + data', async () => {
52+
const a = dl.zeros([10, 10]);
53+
// Pre-upload a on gpu.
54+
a.square();
55+
const time = await dl.time(() => a.sqrt()) as dl.WebGLTimingInfo;
56+
// The tensor was already on gpu.
57+
expect(time.uploadWaitMs === 0);
58+
expect(time.downloadWaitMs === 0);
59+
expect(time.kernelMs > 0);
60+
expect(time.wallMs >= time.kernelMs);
61+
});
62+
});
63+
64+
describeWithFlags('time cpu', CPU_ENVS, () => {
65+
it('simple upload', async () => {
66+
const a = dl.zeros([10, 10]);
67+
const time = await dl.time(() => a.square());
68+
expect(time.kernelMs > 0);
69+
expect(time.wallMs >= time.kernelMs);
70+
});
71+
});

0 commit comments

Comments
 (0)