Learn XGBOOST with Real Code Examples
Updated Nov 24, 2025
Code Sample Descriptions
1
XGBoost Simple Classification Example
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_iris()
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state=42)
model = xgb.XGBClassifier(objective='multi:softprob',num_class=3,eval_metric='mlogloss')
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
A minimal XGBoost example performing classification on the Iris dataset.
2
XGBoost Binary Classification
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X,y = make_classification(n_samples=100,n_features=5,n_classes=2,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = xgb.XGBClassifier(eval_metric='logloss')
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('Accuracy:',accuracy_score(y_test,y_pred))
Performs binary classification using XGBoost on synthetic data.
3
XGBoost Regression Example
import xgboost as xgb
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
X,y = make_regression(n_samples=100,n_features=5,noise=0.1,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = xgb.XGBRegressor(objective='reg:squarederror')
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('MSE:',mean_squared_error(y_test,y_pred))
Performs regression using XGBoost on synthetic data.
4
XGBoost Feature Importance
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state=42)
model = xgb.XGBClassifier(eval_metric='logloss')
model.fit(X_train,y_train)
print('Feature Importances:',model.feature_importances_)
Trains a model and prints feature importances.
5
XGBoost Early Stopping
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
X_train,X_val,y_train,y_val = train_test_split(data.data,data.target,test_size=0.2,random_state=42)
dtrain = xgb.DMatrix(X_train,y_train)
dval = xgb.DMatrix(X_val,y_val)
params = {'objective':'multi:softprob','num_class':3,'eval_metric':'mlogloss'}
model = xgb.train(params,dtrain,num_boost_round=100,evals=[(dval,'eval')],early_stopping_rounds=10)
Uses early stopping to prevent overfitting.
6
XGBoost Cross-Validation
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold
import numpy as np
data = load_boston()
dtrain = xgb.DMatrix(data.data,data.target)
params = {'objective':'reg:squarederror'}
kf = KFold(n_splits=5,shuffle=True,random_state=42)
results = []
for train_index,test_index in kf.split(data.data):
X_train,X_test = data.data[train_index],data.data[test_index]
y_train,y_test = data.target[train_index],data.target[test_index]
model = xgb.XGBRegressor(objective='reg:squarederror')
model.fit(X_train,y_train)
results.append(model.score(X_test,y_test))
print('CV Scores:',results)
Performs cross-validation with XGBoost.
7
XGBoost Grid Search Example
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
data = load_iris()
param_grid = {'max_depth':[2,3],'n_estimators':[50,100]}
model = xgb.XGBClassifier(eval_metric='mlogloss')
grid = GridSearchCV(model,param_grid,cv=3)
grid.fit(data.data,data.target)
print('Best Params:',grid.best_params_)
Performs hyperparameter tuning with GridSearchCV.
8
XGBoost Predict Probabilities
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state=42)
model = xgb.XGBClassifier(objective='multi:softprob',num_class=3,eval_metric='mlogloss')
model.fit(X_train,y_train)
probs = model.predict_proba(X_test)
print('Predicted Probabilities:',probs)
Predicts class probabilities with XGBoost.
9
XGBoost Save and Load Model
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state=42)
model = xgb.XGBClassifier(eval_metric='mlogloss')
model.fit(X_train,y_train)
model.save_model('xgb_model.json')
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('xgb_model.json')
print('Loaded model accuracy:',loaded_model.score(X_test,y_test))
Saves and loads an XGBoost model to/from file.
10
XGBoost Feature Importance Plot
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
data = load_iris()
model = xgb.XGBClassifier(eval_metric='mlogloss')
model.fit(data.data,data.target)
xgb.plot_importance(model)
plt.show()
Plots feature importances using XGBoost and matplotlib.