Created
May 17, 2023 07:59
-
-
Save pranavsharma/f3c3ced552cada00fb556734c6967711 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using Microsoft.ML.OnnxRuntime; | |
| using Microsoft.ML.OnnxRuntime.Tensors; | |
| using System; | |
| using System.Buffers; | |
| using System.IO; | |
| var session = new InferenceSession("C:\\Users\\prs\\model.onnx", SessionOptions.MakeSessionOptionWithCudaProvider(0)); // return (x + y) * 2 | |
| int batch_size = 10000; | |
| IDisposableReadOnlyCollection<DisposableNamedOnnxValue> prevOutput = null; | |
| var ortGpuMemoryInfo = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA, OrtAllocatorType.DeviceAllocator, 0, OrtMemType.Default); | |
| var ortCPUMemoryInfo = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA_PINNED, OrtAllocatorType.DeviceAllocator, 0, OrtMemType.CpuOutput); | |
| var ioBinding = session.CreateIoBinding(); | |
| var runOptions = new RunOptions(); | |
| // profile with iobinding | |
| // binding output to allocated GPU memory | |
| Tensor<float> inputTensorX = new DenseTensor<float>(new float[batch_size * 1000], new int[] { batch_size, 1000 }); | |
| inputTensorX.Fill(1.0f); | |
| // convert to cuda tensor | |
| using (FixedBufferOnnxValue inputx = FixedBufferOnnxValue.CreateFromTensor(inputTensorX)) | |
| { | |
| ioBinding.BindInput("x", inputx); | |
| ioBinding.BindInput("y", inputx); | |
| ioBinding.BindOutputToDevice("output", ortGpuMemoryInfo); | |
| ioBinding.SynchronizeBoundInputs(); | |
| var outputs = session.RunWithBindingAndNames(runOptions, ioBinding); | |
| ioBinding.ClearBoundInputs(); | |
| prevOutput?.Dispose(); | |
| prevOutput = outputs; | |
| inputTensorX = prevOutput.ElementAt(0).AsTensor<float>(); // gpu tensor | |
| } | |
| // start profiling | |
| var dt = DateTime.Now; | |
| var N = 10000; | |
| var syncSecs = 0.0; | |
| var runSecs = 0.0; | |
| var disposeSecs = 0.0; | |
| var bindSecs = 0.0; | |
| for (int i = 0; i < N; i++) | |
| { | |
| var tensorBase = inputTensorX as TensorBase; | |
| var typeInfo = tensorBase.GetTypeInfo(); | |
| TensorElementType elementType = typeInfo.ElementType; | |
| DenseTensor<float> denseTensor = inputTensorX as DenseTensor<float>; | |
| var elementSize = typeInfo.TypeSize; | |
| var dataBufferLength = denseTensor.Buffer.Length * elementSize; | |
| var shape = new long[denseTensor.Dimensions.Length]; | |
| for (int j = 0; j < denseTensor.Dimensions.Length; ++j) | |
| { | |
| shape[j] = denseTensor.Dimensions[j]; | |
| } | |
| using (FixedBufferOnnxValue inputx = FixedBufferOnnxValue.CreateFromMemory<float>(ortGpuMemoryInfo, denseTensor.Buffer, elementType, shape, dataBufferLength)) | |
| { | |
| ioBinding.ClearBoundInputs(); | |
| ioBinding.ClearBoundOutputs(); | |
| var start_bind = DateTime.Now; | |
| ioBinding.BindInput("x", inputx); | |
| ioBinding.BindInput("y", inputx); | |
| bindSecs += (DateTime.Now - start_bind).TotalSeconds; | |
| if (i == N - 1) | |
| { | |
| ioBinding.BindOutputToDevice("output", ortCPUMemoryInfo); | |
| } | |
| else | |
| { | |
| ioBinding.BindOutputToDevice("output", ortGpuMemoryInfo); | |
| } | |
| var start_sync = DateTime.Now; | |
| ioBinding.SynchronizeBoundInputs(); | |
| var diff = (DateTime.Now - start_sync).TotalSeconds; | |
| syncSecs += diff; | |
| var start_run = DateTime.Now; | |
| var outputs = session.RunWithBindingAndNames(runOptions, ioBinding); | |
| runSecs += (DateTime.Now - start_run).TotalSeconds; | |
| var start_dispose = DateTime.Now; | |
| prevOutput?.Dispose(); | |
| disposeSecs += (DateTime.Now - start_dispose).TotalSeconds; | |
| prevOutput = outputs; | |
| inputTensorX = prevOutput.ElementAt(0).AsTensor<float>(); | |
| } | |
| } | |
| Console.WriteLine($"Sync secs: {syncSecs}, BindSecs: {bindSecs}, RunSecs: {runSecs}, DisposeSecs: {disposeSecs}"); | |
| Console.WriteLine($"Time spent of w IoBinding: {(DateTime.Now - dt).TotalSeconds}"); | |
| Console.WriteLine($"Output: {inputTensorX.AsEnumerable().First()}"); | |
| prevOutput?.Dispose(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment