Skip to content

Instantly share code, notes, and snippets.

@pranavsharma
Created May 17, 2023 07:59
Show Gist options
  • Select an option

  • Save pranavsharma/f3c3ced552cada00fb556734c6967711 to your computer and use it in GitHub Desktop.

Select an option

Save pranavsharma/f3c3ced552cada00fb556734c6967711 to your computer and use it in GitHub Desktop.
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Buffers;
using System.IO;
var session = new InferenceSession("C:\\Users\\prs\\model.onnx", SessionOptions.MakeSessionOptionWithCudaProvider(0)); // return (x + y) * 2
int batch_size = 10000;
IDisposableReadOnlyCollection<DisposableNamedOnnxValue> prevOutput = null;
var ortGpuMemoryInfo = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA, OrtAllocatorType.DeviceAllocator, 0, OrtMemType.Default);
var ortCPUMemoryInfo = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA_PINNED, OrtAllocatorType.DeviceAllocator, 0, OrtMemType.CpuOutput);
var ioBinding = session.CreateIoBinding();
var runOptions = new RunOptions();
// profile with iobinding
// binding output to allocated GPU memory
Tensor<float> inputTensorX = new DenseTensor<float>(new float[batch_size * 1000], new int[] { batch_size, 1000 });
inputTensorX.Fill(1.0f);
// convert to cuda tensor
using (FixedBufferOnnxValue inputx = FixedBufferOnnxValue.CreateFromTensor(inputTensorX))
{
ioBinding.BindInput("x", inputx);
ioBinding.BindInput("y", inputx);
ioBinding.BindOutputToDevice("output", ortGpuMemoryInfo);
ioBinding.SynchronizeBoundInputs();
var outputs = session.RunWithBindingAndNames(runOptions, ioBinding);
ioBinding.ClearBoundInputs();
prevOutput?.Dispose();
prevOutput = outputs;
inputTensorX = prevOutput.ElementAt(0).AsTensor<float>(); // gpu tensor
}
// start profiling
var dt = DateTime.Now;
var N = 10000;
var syncSecs = 0.0;
var runSecs = 0.0;
var disposeSecs = 0.0;
var bindSecs = 0.0;
for (int i = 0; i < N; i++)
{
var tensorBase = inputTensorX as TensorBase;
var typeInfo = tensorBase.GetTypeInfo();
TensorElementType elementType = typeInfo.ElementType;
DenseTensor<float> denseTensor = inputTensorX as DenseTensor<float>;
var elementSize = typeInfo.TypeSize;
var dataBufferLength = denseTensor.Buffer.Length * elementSize;
var shape = new long[denseTensor.Dimensions.Length];
for (int j = 0; j < denseTensor.Dimensions.Length; ++j)
{
shape[j] = denseTensor.Dimensions[j];
}
using (FixedBufferOnnxValue inputx = FixedBufferOnnxValue.CreateFromMemory<float>(ortGpuMemoryInfo, denseTensor.Buffer, elementType, shape, dataBufferLength))
{
ioBinding.ClearBoundInputs();
ioBinding.ClearBoundOutputs();
var start_bind = DateTime.Now;
ioBinding.BindInput("x", inputx);
ioBinding.BindInput("y", inputx);
bindSecs += (DateTime.Now - start_bind).TotalSeconds;
if (i == N - 1)
{
ioBinding.BindOutputToDevice("output", ortCPUMemoryInfo);
}
else
{
ioBinding.BindOutputToDevice("output", ortGpuMemoryInfo);
}
var start_sync = DateTime.Now;
ioBinding.SynchronizeBoundInputs();
var diff = (DateTime.Now - start_sync).TotalSeconds;
syncSecs += diff;
var start_run = DateTime.Now;
var outputs = session.RunWithBindingAndNames(runOptions, ioBinding);
runSecs += (DateTime.Now - start_run).TotalSeconds;
var start_dispose = DateTime.Now;
prevOutput?.Dispose();
disposeSecs += (DateTime.Now - start_dispose).TotalSeconds;
prevOutput = outputs;
inputTensorX = prevOutput.ElementAt(0).AsTensor<float>();
}
}
Console.WriteLine($"Sync secs: {syncSecs}, BindSecs: {bindSecs}, RunSecs: {runSecs}, DisposeSecs: {disposeSecs}");
Console.WriteLine($"Time spent of w IoBinding: {(DateTime.Now - dt).TotalSeconds}");
Console.WriteLine($"Output: {inputTensorX.AsEnumerable().First()}");
prevOutput?.Dispose();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment