"""
Cerebrum Forex - LightGBM Model
Light gradient boosting model for signal prediction.
"""
import logging
import pickle
from pathlib import Path
from typing import Tuple
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from config.settings import default_settings, IS_FROZEN
try:
import lightgbm as lgb
LIGHTGBM_AVAILABLE = True
except ImportError:
LIGHTGBM_AVAILABLE = False
from .base_model import BaseModel
logger = logging.getLogger(__name__)
[docs]
class LightGBMModel(BaseModel):
"""LightGBM model for forex signal prediction"""
def __init__(self, timeframe: str, model_dir: Path):
super().__init__(timeframe, model_dir)
if not LIGHTGBM_AVAILABLE:
logger.error("LightGBM not installed")
return
self.params = {
'boosting_type': 'gbdt',
'num_leaves': 64, # Increased from 31
'max_depth': -1,
'learning_rate': 0.05, # Slower learning
'n_estimators': 500, # More trees
'subsample': 0.7,
'colsample_bytree': 0.7,
'min_child_samples': 50, # Prevent overfitting on noise
'random_state': 42,
'verbose': -1,
'extra_trees': True, # Extra randomization
'n_jobs': 1 if IS_FROZEN else 2, # Throttled
}
@property
def name(self) -> str:
return "lightgbm"
[docs]
def train(self, X: np.ndarray, y: np.ndarray,
X_val: np.ndarray = None, y_val: np.ndarray = None,
class_weights: dict = None) -> float:
"""Train LightGBM model with optional external validation set and class weights"""
if not LIGHTGBM_AVAILABLE:
logger.error("LightGBM not available")
return 0.0
try:
# Ensure y is 1D array
y = np.asarray(y).ravel()
# Detect number of classes dynamically
unique_classes = np.unique(y)
num_classes = len(unique_classes)
logger.info(f"[LightGBM {self.timeframe}] Detected {num_classes} classes: {unique_classes}")
# Adjust objective based on class count
if num_classes == 2:
self.params['objective'] = 'binary'
if 'num_class' in self.params:
del self.params['num_class']
else:
self.params['objective'] = 'multiclass'
self.params['num_class'] = num_classes
# Use external val set if provided, else split internally
if X_val is None or y_val is None:
try:
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=default_settings.validation_ratio, random_state=42, stratify=y
)
except ValueError:
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=default_settings.validation_ratio, random_state=42
)
else:
X_train, y_train = X, y
y_val = np.asarray(y_val).ravel()
# Apply class weights if provided
sample_weights = None
if class_weights is not None and (hasattr(class_weights, '__len__') and len(class_weights) > 0):
if isinstance(class_weights, dict):
sample_weights = np.array([class_weights.get(int(label), 1.0) for label in y_train])
elif hasattr(class_weights, 'size') and class_weights.size > 0:
sample_weights = class_weights
# Create and train model
self.model = lgb.LGBMClassifier(**self.params)
self.model.fit(
X_train, y_train,
eval_set=[(X_val, y_val)],
sample_weight=sample_weights,
)
# Evaluate
y_pred = self.model.predict(X_val)
self.accuracy = balanced_accuracy_score(y_val.ravel(), y_pred.ravel())
self.is_trained = True
logger.info(f"[LightGBM {self.timeframe}] ✓ Balanced Accuracy: {self.accuracy:.2%}")
# Save model
self.save()
return self.accuracy
except Exception as e:
import traceback
logger.error(f"LightGBM training failed: {e}")
logger.error(traceback.format_exc())
return 0.0
[docs]
def predict(self, X: np.ndarray) -> Tuple[str, float]:
"""Make prediction with LightGBM"""
if not self.is_trained and not self.load():
logger.warning("LightGBM model not trained")
return "NEUTRAL", 0.0
try:
# Handle Feature Mismatch (Train vs Predict)
# 1. New System
expected_features = self.feature_names
# 2. Legacy Fallback
if not expected_features and hasattr(self.model, 'feature_name_'):
expected_features = self.model.feature_name_
if hasattr(X, 'columns') and expected_features:
# Check if we have all required features
missing = [f for f in expected_features if f not in X.columns]
if missing:
logger.warning(f"LightGBM mismatch: Missing {len(missing)} features ({missing[:3]}...). Returning NEUTRAL.")
return "NEUTRAL", 0.0
# Check for "Unnamed" features vs Named features mismatch (Legacy)
if len(expected_features) > 0 and str(expected_features[0]).startswith("Column_") and not str(X.columns[0]).startswith("Column_"):
logger.warning(f"LightGBM schema mismatch: Model expects raw features (Column_X) but got named features. Returning NEUTRAL.")
return "NEUTRAL", 0.0
# Select only expected columns in correct order
X = X[expected_features]
# Ensure 2D input
if hasattr(X, 'values'):
X = X.values
X = np.array(X)
if X.ndim == 1:
X = X.reshape(1, -1)
# Get probabilities
proba = self.model.predict_proba(X)
pred_class = np.argmax(proba, axis=1)[0]
confidence = proba[0][pred_class]
signal = self.signal_from_prediction(pred_class)
return signal, float(confidence)
except Exception as e:
logger.error(f"LightGBM prediction failed: {e}")
return "NEUTRAL", 0.0
[docs]
def load(self) -> bool:
"""Load model from pickle file"""
if not self.model_path.exists():
return False
try:
with open(self.model_path, 'rb') as f:
data = pickle.load(f)
self.model = data['model']
self.accuracy = data.get('accuracy', 0.0)
self.params = data.get('params', self.params)
self.feature_names = data.get('feature_names', [])
self.is_trained = True
logger.info(f"LightGBM model loaded from {self.model_path} ({len(self.feature_names)} features)")
return True
except Exception as e:
logger.error(f"Failed to load LightGBM model: {e}")
return False