Learn LIGHTGBM with Real Code Examples
Updated Nov 24, 2025
Code Sample Descriptions
1
LightGBM Simple Classification Example
import lightgbm as lgb
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
# Create dataset for LightGBM
train_data = lgb.Dataset(X_train, label=y_train)
# Define parameters
params = {'objective':'multiclass','num_class':3,'metric':'multi_logloss'}
# Train model
model = lgb.train(params, train_data, num_boost_round=100)
# Predict
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
print('Accuracy:', accuracy_score(y_test, y_pred_labels))
A minimal LightGBM example performing classification on a small dataset.
2
LightGBM Binary Classification Example
import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X, y = make_classification(n_samples=200, n_features=5, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
train_data = lgb.Dataset(X_train, label=y_train)
params = {'objective':'binary','metric':'binary_logloss'}
model = lgb.train(params, train_data, num_boost_round=50)
y_pred = model.predict(X_test)
y_pred_labels = (y_pred > 0.5).astype(int)
print('Accuracy:', accuracy_score(y_test, y_pred_labels))
Binary classification using LightGBM on synthetic data.
3
LightGBM Regression Example
import lightgbm as lgb
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
X, y = make_regression(n_samples=100, n_features=3, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
train_data = lgb.Dataset(X_train, label=y_train)
params = {'objective':'regression','metric':'rmse'}
model = lgb.train(params, train_data, num_boost_round=100)
y_pred = model.predict(X_test)
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
Simple regression example with LightGBM.
4
LightGBM with Categorical Features
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Sample data
data = pd.DataFrame({'feature1':[1,2,3,4,5], 'feature2':['A','B','A','B','C'], 'label':[0,1,0,1,0]})
data['feature2'] = data['feature2'].astype('category')
X = data[['feature1','feature2']]
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_data = lgb.Dataset(X_train, label=y_train, categorical_feature=['feature2'])
params = {'objective':'binary','metric':'binary_logloss'}
model = lgb.train(params, train_data, num_boost_round=50)
y_pred = model.predict(X_test)
y_pred_labels = (y_pred > 0.5).astype(int)
print('Accuracy:', accuracy_score(y_test, y_pred_labels))
Classification using LightGBM with categorical features.
5
LightGBM Early Stopping Example
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_breast_cancer()
X_train, X_val, y_train, y_val = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
params = {'objective':'binary','metric':'binary_logloss'}
model = lgb.train(params, train_data, num_boost_round=100, valid_sets=[val_data], early_stopping_rounds=10)
y_pred = model.predict(X_val)
y_pred_labels = (y_pred > 0.5).astype(int)
print('Accuracy:', accuracy_score(y_val, y_pred_labels))
Demonstrates early stopping during training.
6
LightGBM Feature Importance Example
import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
train_data = lgb.Dataset(X_train, label=y_train)
params = {'objective':'multiclass','num_class':3,'metric':'multi_logloss'}
model = lgb.train(params, train_data, num_boost_round=100)
lgb.plot_importance(model)
plt.show()
Display feature importance after training a model.
7
LightGBM Cross Validation Example
import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
import numpy as np
data = load_iris()
X = data.data
y = data.target
kf = KFold(n_splits=5)
params = {'objective':'multiclass','num_class':3,'metric':'multi_logloss'}
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
train_data = lgb.Dataset(X_train, label=y_train)
model = lgb.train(params, train_data, num_boost_round=50)
y_pred = np.argmax(model.predict(X_test), axis=1)
print('Fold accuracy:', np.mean(y_pred==y_test))
Performing k-fold cross-validation using LightGBM.
8
LightGBM Regression with Validation
import lightgbm as lgb
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
X, y = make_regression(n_samples=200, n_features=5, noise=0.1)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
params = {'objective':'regression','metric':'rmse'}
model = lgb.train(params, train_data, num_boost_round=100, valid_sets=[val_data], early_stopping_rounds=10)
y_pred = model.predict(X_val)
print('RMSE:', np.sqrt(mean_squared_error(y_val, y_pred)))
Regression with LightGBM using a validation dataset to monitor RMSE.
9
LightGBM Multi-class Classification Example
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = np.random.rand(150,4)
y = np.random.randint(0,3,150)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
train_data = lgb.Dataset(X_train, label=y_train)
params = {'objective':'multiclass','num_class':3,'metric':'multi_logloss'}
model = lgb.train(params, train_data, num_boost_round=100)
y_pred = np.argmax(model.predict(X_test), axis=1)
print('Accuracy:', accuracy_score(y_test, y_pred))
Train a multi-class classifier on synthetic data.