From f4b46f00da0b8107ef95876a144ec38ef9cceda7 Mon Sep 17 00:00:00 2001 From: Christian Helgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:21:34 -0800 Subject: [PATCH] Add Timestamp Queries to Bitonic Sort Example (#347) * add timestamp query functionality to bitonic sort for compatible browsers * seconds to millisecons change * sketched out average swap stuff * Added timestamp for individual step, individual sort, and the average sort time for a full bitonic sort given the current configuration of Total Elements and Size Limit * removed un needed logs --- src/sample/bitonicSort/main.ts | 246 ++++++++++++++++++++++++++++---- src/sample/bitonicSort/utils.ts | 12 +- 2 files changed, 228 insertions(+), 30 deletions(-) diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index 79cd5ff5..040095a1 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -30,6 +30,17 @@ type StepType = type DisplayType = 'Elements' | 'Swap Highlight'; +interface ConfigInfo { + // Number of sorts executed under a given elements + size limit config + sorts: number; + // Total collective time taken to execute each complete sort under this config + time: number; +} + +interface StringKeyToNumber { + [key: string]: ConfigInfo; +} + // Gui settings object interface SettingsInterface { 'Total Elements': number; @@ -52,10 +63,17 @@ interface SettingsInterface { 'Randomize Values': () => void; 'Execute Sort Step': () => void; 'Log Elements': () => void; - 'Complete Sort': () => void; - 'Sort Speed': number; + 'Auto Sort': () => void; + 'Auto Sort Speed': number; 'Display Mode': DisplayType; 'Total Swaps': number; + stepTime: number; + 'Step Time': string; + sortTime: number; + 'Sort Time': string; + 'Average Sort Time': string; + configToCompleteSwapsMap: StringKeyToNumber; + configKey: string; } const getNumSteps = (numElements: number) => { @@ -65,9 +83,34 @@ const getNumSteps = (numElements: number) => { let init: SampleInit; SampleInitFactoryWebGPU( - async ({ pageState, device, gui, presentationFormat, context, canvas }) => { + async ({ + pageState, + device, + gui, + presentationFormat, + context, + canvas, + timestampQueryAvailable, + }) => { const maxInvocationsX = device.limits.maxComputeWorkgroupSizeX; + let querySet: GPUQuerySet; + let timestampQueryResolveBuffer: GPUBuffer; + let timestampQueryResultBuffer: GPUBuffer; + if (timestampQueryAvailable) { + querySet = device.createQuerySet({ type: 'timestamp', count: 2 }); + timestampQueryResolveBuffer = device.createBuffer({ + // 2 timestamps * BigInt size for nanoseconds + size: 2 * BigInt64Array.BYTES_PER_ELEMENT, + usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC, + }); + timestampQueryResultBuffer = device.createBuffer({ + // 2 timestamps * BigInt size for nanoseconds + size: 2 * BigInt64Array.BYTES_PER_ELEMENT, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); + } + const totalElementOptions = []; const maxElements = maxInvocationsX * 32; for (let i = maxElements; i >= 4; i /= 2) { @@ -88,12 +131,12 @@ SampleInitFactoryWebGPU( const settings: SettingsInterface = { // TOTAL ELEMENT AND GRID SETTINGS - // The number of elements to be sorted. Must equal gridWidth * gridHeight || Workgroup Size * Workgroups * 2 + // The number of elements to be sorted. Must equal gridWidth * gridHeight || Workgroup Size * Workgroups * 2. // When changed, all relevant values within the settings object are reset to their defaults at the beginning of a sort with n elements. 'Total Elements': maxElements, - // width of screen in cells + // The width of the screen in cells. 'Grid Width': defaultGridWidth, - // height of screen in cells + // The height of the screen in cells. 'Grid Height': defaultGridHeight, // Grid Dimensions as string 'Grid Dimensions': `${defaultGridWidth}x${defaultGridHeight}`, @@ -145,17 +188,38 @@ SampleInitFactoryWebGPU( 'Log Elements': () => { return; }, - // A function that automatically executes each step of the bitonic sort at an interval determined by 'Sort Speed' - 'Complete Sort': () => { + // A function that automatically executes each step of the bitonic sort at an interval determined by 'Auto Sort Speed' + 'Auto Sort': () => { return; }, - // The speed at which each step of the bitonic sort will be executed after 'Complete Sort' has been called. - 'Sort Speed': 50, + // The speed at which each step of the bitonic sort will be executed after 'Auto Sort' has been called. + 'Auto Sort Speed': 50, // MISCELLANEOUS SETTINGS 'Display Mode': 'Elements', // An atomic value representing the total number of swap operations executed over the course of the bitonic sort. 'Total Swaps': 0, + + // TIMESTAMP SETTINGS + // NOTE: Timestep values below all are calculated in terms of milliseconds rather than the nanoseconds a timestamp query set usually outputs. + // Time taken to execute the previous step of the bitonic sort in milliseconds + 'Step Time': '0ms', + stepTime: 0, + // Total taken to colletively execute each step of the complete bitonic sort, represented in milliseconds. + 'Sort Time': '0ms', + sortTime: 0, + // Average time taken to complete a bitonic sort with the current combination of n 'Total Elements' and x 'Size Limit' + 'Average Sort Time': '0ms', + // A string to number map that maps a string representation of the current 'Total Elements' + 'Size Limit' configuration to a number + // representing the total number of sorts that have been executed under that same configuration. + configToCompleteSwapsMap: { + '8192 256': { + sorts: 0, + time: 0, + }, + }, + // Current key into configToCompleteSwapsMap + configKey: '8192 256', }; // Initialize initial elements array @@ -271,6 +335,18 @@ SampleInitFactoryWebGPU( 'BitonicDisplay' ); + const resetTimeInfo = () => { + settings.stepTime = 0; + settings.sortTime = 0; + stepTimeController.setValue('0ms'); + sortTimeController.setValue(`0ms`); + const nanCheck = + settings.configToCompleteSwapsMap[settings.configKey].time / + settings.configToCompleteSwapsMap[settings.configKey].sorts; + const ast = nanCheck ? nanCheck : 0; + averageSortTimeController.setValue(`${ast.toFixed(5)}ms`); + }; + const resetExecutionInformation = () => { // The workgroup size is either elements / 2 or Size Limit workgroupSizeController.setValue( @@ -401,29 +477,29 @@ SampleInitFactoryWebGPU( } }; - let completeSortIntervalID: ReturnType | null = null; + let autoSortIntervalID: ReturnType | null = null; const endSortInterval = () => { - if (completeSortIntervalID !== null) { - clearInterval(completeSortIntervalID); - completeSortIntervalID = null; + if (autoSortIntervalID !== null) { + clearInterval(autoSortIntervalID); + autoSortIntervalID = null; } }; const startSortInterval = () => { - const currentIntervalSpeed = settings['Sort Speed']; - completeSortIntervalID = setInterval(() => { + const currentIntervalSpeed = settings['Auto Sort Speed']; + autoSortIntervalID = setInterval(() => { if (settings['Next Step'] === 'NONE') { - clearInterval(completeSortIntervalID); - completeSortIntervalID = null; + clearInterval(autoSortIntervalID); + autoSortIntervalID = null; sizeLimitController.domElement.style.pointerEvents = 'auto'; } - if (settings['Sort Speed'] !== currentIntervalSpeed) { - clearInterval(completeSortIntervalID); - completeSortIntervalID = null; + if (settings['Auto Sort Speed'] !== currentIntervalSpeed) { + clearInterval(autoSortIntervalID); + autoSortIntervalID = null; startSortInterval(); } settings.executeStep = true; setSwappedCell(); - }, settings['Sort Speed']); + }, settings['Auto Sort Speed']); }; // At top level, information about resources used to execute the compute shader @@ -435,10 +511,23 @@ SampleInitFactoryWebGPU( endSortInterval(); resizeElementArray(); sizeLimitController.domElement.style.pointerEvents = 'auto'; + // Create new config key for current element + size limit configuration + const currConfigKey = `${settings['Total Elements']} ${settings['Size Limit']}`; + // If configKey doesn't exist in the map, create it. + if (!settings.configToCompleteSwapsMap[currConfigKey]) { + settings.configToCompleteSwapsMap[currConfigKey] = { + sorts: 0, + time: 0, + }; + } + settings.configKey = currConfigKey; + resetTimeInfo(); }); const sizeLimitController = computeResourcesFolder .add(settings, 'Size Limit', sizeLimitOptions) .onChange(() => { + // Change total workgroups per step and size of a workgroup based on arbitrary constraint + // imposed by size limit. const constraint = Math.min( settings['Total Elements'] / 2, settings['Size Limit'] @@ -447,6 +536,7 @@ SampleInitFactoryWebGPU( (settings['Total Elements'] - 1) / (settings['Size Limit'] * 2); workgroupSizeController.setValue(constraint); workgroupsPerStepController.setValue(Math.ceil(workgroupsPerStep)); + // Apply new compute resources values to the sort's compute pipeline computePipeline = computePipeline = device.createComputePipeline({ layout: device.createPipelineLayout({ bindGroupLayouts: [computeBGCluster.bindGroupLayout], @@ -460,6 +550,17 @@ SampleInitFactoryWebGPU( entryPoint: 'computeMain', }, }); + // Create new config key for current element + size limit configuration + const currConfigKey = `${settings['Total Elements']} ${settings['Size Limit']}`; + // If configKey doesn't exist in the map, create it. + if (!settings.configToCompleteSwapsMap[currConfigKey]) { + settings.configToCompleteSwapsMap[currConfigKey] = { + sorts: 0, + time: 0, + }; + } + settings.configKey = currConfigKey; + resetTimeInfo(); }); const workgroupSizeController = computeResourcesFolder.add( settings, @@ -469,11 +570,11 @@ SampleInitFactoryWebGPU( settings, 'Workgroups Per Step' ); + computeResourcesFolder.open(); // Folder with functions that control the execution of the sort const controlFolder = gui.addFolder('Sort Controls'); - controlFolder.add(settings, 'Sort Speed', 50, 1000).step(50); controlFolder.add(settings, 'Execute Sort Step').onChange(() => { // Size Limit locked upon sort sizeLimitController.domElement.style.pointerEvents = 'none'; @@ -484,17 +585,19 @@ SampleInitFactoryWebGPU( endSortInterval(); randomizeElementArray(); resetExecutionInformation(); + resetTimeInfo(); // Unlock workgroup size limit controller since sort has stopped sizeLimitController.domElement.style.pointerEvents = 'auto'; }); controlFolder .add(settings, 'Log Elements') .onChange(() => console.log(elements)); - controlFolder.add(settings, 'Complete Sort').onChange(() => { + controlFolder.add(settings, 'Auto Sort').onChange(() => { // Invocation Limit locked upon sort sizeLimitController.domElement.style.pointerEvents = 'none'; startSortInterval(); }); + controlFolder.add(settings, 'Auto Sort Speed', 50, 1000).step(50); controlFolder.open(); // Information about grid display @@ -536,6 +639,15 @@ SampleInitFactoryWebGPU( 'Next Swap Span' ); + // Timestamp information for Chrome 121+ or other compatible browsers + const timestampFolder = gui.addFolder('Timestamp Info (Chrome 121+)'); + const stepTimeController = timestampFolder.add(settings, 'Step Time'); + const sortTimeController = timestampFolder.add(settings, 'Sort Time'); + const averageSortTimeController = timestampFolder.add( + settings, + 'Average Sort Time' + ); + // Adjust styles of Function List Elements within GUI const liFunctionElements = document.getElementsByClassName('cr function'); for (let i = 0; i < liFunctionElements.length; i++) { @@ -575,6 +687,9 @@ SampleInitFactoryWebGPU( workgroupSizeController.domElement.style.pointerEvents = 'none'; gridDimensionsController.domElement.style.pointerEvents = 'none'; totalSwapsController.domElement.style.pointerEvents = 'none'; + stepTimeController.domElement.style.pointerEvents = 'none'; + sortTimeController.domElement.style.pointerEvents = 'none'; + averageSortTimeController.domElement.style.pointerEvents = 'none'; gui.width = 325; let highestBlockHeight = 2; @@ -621,13 +736,41 @@ SampleInitFactoryWebGPU( }); if ( settings.executeStep && - highestBlockHeight !== settings['Total Elements'] * 2 + highestBlockHeight < settings['Total Elements'] * 2 ) { - const computePassEncoder = commandEncoder.beginComputePass(); + let computePassEncoder: GPUComputePassEncoder; + if (timestampQueryAvailable) { + computePassEncoder = commandEncoder.beginComputePass({ + timestampWrites: { + querySet, + beginningOfPassWriteIndex: 0, + endOfPassWriteIndex: 1, + }, + }); + } else { + computePassEncoder = commandEncoder.beginComputePass(); + } computePassEncoder.setPipeline(computePipeline); computePassEncoder.setBindGroup(0, computeBGCluster.bindGroups[0]); computePassEncoder.dispatchWorkgroups(settings['Workgroups Per Step']); computePassEncoder.end(); + // Resolve time passed in between beginning and end of computePass + if (timestampQueryAvailable) { + commandEncoder.resolveQuerySet( + querySet, + 0, + 2, + timestampQueryResolveBuffer, + 0 + ); + commandEncoder.copyBufferToBuffer( + timestampQueryResolveBuffer, + 0, + timestampQueryResultBuffer, + 0, + 2 * BigInt64Array.BYTES_PER_ELEMENT + ); + } settings['Step Index'] = settings['Step Index'] + 1; currentStepController.setValue( `${settings['Step Index']} of ${settings['Total Steps']}` @@ -641,9 +784,15 @@ SampleInitFactoryWebGPU( // The next cycle's flip operation will have a maximum swap span 2 times that of the previous cycle highestBlockHeight *= 2; if (highestBlockHeight === settings['Total Elements'] * 2) { - // The next cycle's maximum swap span exceeds the total number of elements. Thus, the sort is over. + // The next cycle's maximum swap span exceeds the total number of elements. Therefore, the sort is over. + // Accordingly, there will be no next step. nextStepController.setValue('NONE'); + // And if there is no next step, then there are no swaps, and no block range within which two elements are swapped. nextBlockHeightController.setValue(0); + // Finally, with our sort completed, we can increment the number of total completed sorts executed with n 'Total Elements' + // and x 'Size Limit', which will allow us to calculate the average time of all sorts executed with this specific + // configuration of compute resources + settings.configToCompleteSwapsMap[settings.configKey].sorts += 1; } else if (highestBlockHeight > settings['Workgroup Size'] * 2) { // The next cycle's maximum swap span exceeds the range of a single workgroup, so our next flip will operate on global indices. nextStepController.setValue('FLIP_GLOBAL'); @@ -679,7 +828,10 @@ SampleInitFactoryWebGPU( } device.queue.submit([commandEncoder.finish()]); - if (settings.executeStep) { + if ( + settings.executeStep && + highestBlockHeight < settings['Total Elements'] * 4 + ) { // Copy GPU element data to CPU await elementsStagingBuffer.mapAsync( GPUMapMode.READ, @@ -700,7 +852,6 @@ SampleInitFactoryWebGPU( 0, Uint32Array.BYTES_PER_ELEMENT ); - // Get correct range of data from CPU copy of GPU Data const elementsData = copyElementsBuffer.slice( 0, Uint32Array.BYTES_PER_ELEMENT * settings['Total Elements'] @@ -717,6 +868,43 @@ SampleInitFactoryWebGPU( // Elements output becomes elements input, swap accumulate elements = elementsOutput; setSwappedCell(); + + // Handle timestamp query stuff + if (timestampQueryAvailable) { + // Copy timestamp query result buffer data to CPU + await timestampQueryResultBuffer.mapAsync( + GPUMapMode.READ, + 0, + 2 * BigInt64Array.BYTES_PER_ELEMENT + ); + const copyTimestampResult = new BigInt64Array( + timestampQueryResultBuffer.getMappedRange() + ); + // Calculate new step, sort, and average sort times + const newStepTime = + Number(copyTimestampResult[1] - copyTimestampResult[0]) / 1000000; + const newSortTime = settings.sortTime + newStepTime; + // Apply calculated times to settings object as both number and 'ms' appended string + settings.stepTime = newStepTime; + settings.sortTime = newSortTime; + stepTimeController.setValue(`${newStepTime.toFixed(5)}ms`); + sortTimeController.setValue(`${newSortTime.toFixed(5)}ms`); + // Calculate new average sort upon end of final execution step of a full bitonic sort. + if (highestBlockHeight === settings['Total Elements'] * 2) { + // Lock off access to this larger if block..not best architected solution but eh + highestBlockHeight *= 2; + settings.configToCompleteSwapsMap[settings.configKey].time += + newSortTime; + const averageSortTime = + settings.configToCompleteSwapsMap[settings.configKey].time / + settings.configToCompleteSwapsMap[settings.configKey].sorts; + averageSortTimeController.setValue( + `${averageSortTime.toFixed(5)}ms` + ); + } + timestampQueryResultBuffer.unmap(); + // Get correct range of data from CPU copy of GPU Data + } } settings.executeStep = false; requestAnimationFrame(frame); diff --git a/src/sample/bitonicSort/utils.ts b/src/sample/bitonicSort/utils.ts index bbbeb9f3..6886d6c7 100644 --- a/src/sample/bitonicSort/utils.ts +++ b/src/sample/bitonicSort/utils.ts @@ -98,6 +98,7 @@ interface DeviceInitParms { interface DeviceInit3DParams extends DeviceInitParms { context: GPUCanvasContext; presentationFormat: GPUTextureFormat; + timestampQueryAvailable: boolean; } type CallbackSync3D = (params: SampleInitParams & DeviceInit3DParams) => void; @@ -112,7 +113,15 @@ export const SampleInitFactoryWebGPU = async ( ): Promise => { const init: SampleInit = async ({ canvas, pageState, gui, stats }) => { const adapter = await navigator.gpu.requestAdapter(); - const device = await adapter.requestDevice(); + const timestampQueryAvailable = adapter.features.has('timestamp-query'); + let device: GPUDevice; + if (timestampQueryAvailable) { + device = await adapter.requestDevice({ + requiredFeatures: ['timestamp-query'], + }); + } else { + device = await adapter.requestDevice(); + } if (!pageState.active) return; const context = canvas.getContext('webgpu') as GPUCanvasContext; const devicePixelRatio = window.devicePixelRatio; @@ -133,6 +142,7 @@ export const SampleInitFactoryWebGPU = async ( context, presentationFormat, stats, + timestampQueryAvailable, }); }; return init;