Learn ONNX with Real Code Examples
Updated Nov 24, 2025
Code Sample Descriptions
1
ONNX Model Inference Example
import onnxruntime as ort
import numpy as np
# Load ONNX model
session = ort.InferenceSession('model.onnx')
# Prepare input
input_name = session.get_inputs()[0].name
input_data = np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32)
# Run inference
outputs = session.run(None, {input_name: input_data})
print('Model output:', outputs)
A minimal example loading an ONNX model and performing inference using ONNX Runtime.
2
ONNX Image Classification Inference
import onnxruntime as ort
import numpy as np
from PIL import Image
# Load image
image = Image.open('image.jpg').resize((224,224))
input_data = np.array(image).astype(np.float32)
input_data = np.expand_dims(input_data, axis=0)
# Load model
session = ort.InferenceSession('resnet50.onnx')
input_name = session.get_inputs()[0].name
# Run inference
outputs = session.run(None, {input_name: input_data})
print('Predicted class:', np.argmax(outputs[0]))
Performing image classification using a pretrained ONNX model.
3
ONNX Batch Inference Example
import onnxruntime as ort
import numpy as np
# Batch input
batch_input = np.array([[1,2,3,4],[5,6,7,8]], dtype=np.float32)
# Load model
session = ort.InferenceSession('model.onnx')
input_name = session.get_inputs()[0].name
# Run batch inference
outputs = session.run(None, {input_name: batch_input})
print('Batch outputs:', outputs)
Performing inference on a batch of inputs using ONNX Runtime.
4
ONNX Regression Model Inference
import onnxruntime as ort
import numpy as np
# Sample input
input_data = np.array([[10.0, 20.0, 30.0]], dtype=np.float32)
# Load model
session = ort.InferenceSession('regression_model.onnx')
input_name = session.get_inputs()[0].name
# Run inference
pred = session.run(None, {input_name: input_data})
print('Regression prediction:', pred)
Inference example for a regression ONNX model.
5
ONNX GPU Inference Example
import onnxruntime as ort
import numpy as np
# Load model on GPU
session = ort.InferenceSession('model.onnx', providers=['CUDAExecutionProvider'])
input_name = session.get_inputs()[0].name
input_data = np.random.rand(1,4).astype(np.float32)
# Run inference
outputs = session.run(None, {input_name: input_data})
print('GPU inference output:', outputs)
Running ONNX model inference on GPU using ONNX Runtime with CUDA provider.
6
ONNX Multiple Outputs Example
import onnxruntime as ort
import numpy as np
# Load model
session = ort.InferenceSession('multi_output_model.onnx')
input_name = session.get_inputs()[0].name
input_data = np.random.rand(1,10).astype(np.float32)
# Run inference
outputs = session.run(None, {input_name: input_data})
print('Output 1:', outputs[0])
print('Output 2:', outputs[1])
Running inference for an ONNX model with multiple outputs.
7
ONNX Dynamic Input Shape Example
import onnxruntime as ort
import numpy as np
# Input with variable batch size
input_data = np.random.rand(5,4).astype(np.float32)
# Load model
session = ort.InferenceSession('dynamic_model.onnx')
input_name = session.get_inputs()[0].name
# Run inference
outputs = session.run(None, {input_name: input_data})
print('Dynamic input output:', outputs)
Inference with dynamic input shapes in ONNX Runtime.
8
ONNX Softmax Output Example
import onnxruntime as ort
import numpy as np
# Load model
session = ort.InferenceSession('classification_model.onnx')
input_name = session.get_inputs()[0].name
input_data = np.random.rand(1,10).astype(np.float32)
# Run inference
logits = session.run(None, {input_name: input_data})[0]
softmax = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
print('Softmax probabilities:', softmax)
Performing inference and applying softmax to ONNX model outputs.
9
ONNX Text Model Inference Example
import onnxruntime as ort
import numpy as np
# Example input
input_data = np.random.rand(1,128).astype(np.float32) # e.g., token embeddings
# Load model
session = ort.InferenceSession('text_model.onnx')
input_name = session.get_inputs()[0].name
# Run inference
outputs = session.run(None, {input_name: input_data})
print('Text classification output:', outputs)
Performing inference using an ONNX text classification model.
10
ONNX Model Warmup Example
import onnxruntime as ort
import numpy as np
# Load model
session = ort.InferenceSession('model.onnx')
input_name = session.get_inputs()[0].name
# Warmup pass
for _ in range(5):
input_data = np.random.rand(1,4).astype(np.float32)
_ = session.run(None, {input_name: input_data})
print('ONNX model warmed up and ready for real inference.')
Performing a warmup pass for an ONNX model to optimize runtime performance.