Learn CATBOOST with Real Code Examples
Updated Nov 24, 2025
Code Sample Descriptions
1
CatBoost Simple Classification Example
from catboost import CatBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Define model
model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=0)
# Train model
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
A minimal CatBoost example performing classification on the Iris dataset.
2
CatBoost Regression Example
from catboost import CatBoostRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Generate data
X, y = make_regression(n_samples=200, n_features=5, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define model
model = CatBoostRegressor(iterations=200, learning_rate=0.05, depth=4, verbose=0)
# Train model
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
print('MSE:', mean_squared_error(y_test, y_pred))
A simple regression using CatBoost on synthetic data.
3
CatBoost Multi-class Classification
from catboost import CatBoostClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load dataset
data = load_wine()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, random_state=42)
# Define model
model = CatBoostClassifier(iterations=150, learning_rate=0.1, depth=5, verbose=0, loss_function='MultiClass')
# Train model
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
CatBoost handling multi-class classification on the Wine dataset.
4
CatBoost with Categorical Features
from catboost import CatBoostClassifier, Pool
import pandas as pd
# Sample data
data = pd.DataFrame({
'feature_num': [1,2,3,4,5,6],
'feature_cat': ['A','B','A','B','C','C'],
'label': [0,1,0,1,0,1]
})
X = data[['feature_num','feature_cat']]
y = data['label']
# Define categorical features
cat_features = ['feature_cat']
# Create Pool
data_pool = Pool(X, y, cat_features=cat_features)
# Define model
model = CatBoostClassifier(iterations=50, learning_rate=0.1, depth=3, verbose=0)
# Train model
model.fit(data_pool)
# Predict
y_pred = model.predict(X)
print('Predictions:', y_pred)
Using CatBoost with categorical features in a classification task.
5
CatBoost with Early Stopping
from catboost import CatBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load dataset
data = load_iris()
X_train, X_valid, y_train, y_valid = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Define model
model = CatBoostClassifier(iterations=500, learning_rate=0.05, depth=4, verbose=0)
# Train with early stopping
model.fit(X_train, y_train, eval_set=(X_valid, y_valid), early_stopping_rounds=20)
# Predict
y_pred = model.predict(X_valid)
print('Predictions:', y_pred)
CatBoost training with early stopping based on validation set.
6
CatBoost Ranking Example
from catboost import CatBoostRanker
import numpy as np
# Sample ranking data
X_train = np.random.rand(6,3)
y_train = np.array([1,2,3,1,2,3])
group_id = np.array([0,0,0,1,1,1])
# Define model
model = CatBoostRanker(iterations=100, learning_rate=0.1, depth=3, verbose=0)
# Train model
model.fit(X_train, y_train, group_id=group_id)
# Predict
y_pred = model.predict(X_train)
print('Ranking Predictions:', y_pred)
Using CatBoost for a simple ranking problem.
7
CatBoost with Custom Loss Function
from catboost import CatBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Custom loss function (logloss)
model = CatBoostClassifier(loss_function='Logloss', iterations=200, learning_rate=0.05, depth=4, verbose=0)
# Train model
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
print('Predictions:', y_pred)
Example showing how to use a custom loss function with CatBoost.
8
CatBoost Feature Importance
from catboost import CatBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Define model
model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=0)
# Train model
model.fit(X_train, y_train)
# Feature importance
importance = model.get_feature_importance()
print('Feature Importance:', importance)
Compute and display feature importance using CatBoost.
9
CatBoost with Grid Search
from catboost import CatBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Define model
model = CatBoostClassifier(verbose=0)
# Define hyperparameter grid
param_grid = {'depth':[3,4,5], 'learning_rate':[0.05,0.1], 'iterations':[100,200]}
# Grid Search
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_search.fit(X_train, y_train)
print('Best Params:', grid_search.best_params_)
Performing hyperparameter tuning using Grid Search with CatBoost.
10
CatBoost Save and Load Model
from catboost import CatBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Define model
model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=0)
# Train model
model.fit(X_train, y_train)
# Save model
model.save_model('catboost_model.cbm')
# Load model
loaded_model = CatBoostClassifier()
loaded_model.load_model('catboost_model.cbm')
# Predict
y_pred = loaded_model.predict(X_test)
print('Accuracy:', y_pred)
Saving and loading a trained CatBoost model.