Source code for models.lightgbm_model

"""
Cerebrum Forex - LightGBM Model
Light gradient boosting model for signal prediction.
"""

import logging
import pickle
from pathlib import Path
from typing import Tuple

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from config.settings import default_settings, IS_FROZEN

try:
    import lightgbm as lgb
    LIGHTGBM_AVAILABLE = True
except ImportError:
    LIGHTGBM_AVAILABLE = False

from .base_model import BaseModel

logger = logging.getLogger(__name__)


[docs] class LightGBMModel(BaseModel): """LightGBM model for forex signal prediction""" def __init__(self, timeframe: str, model_dir: Path): super().__init__(timeframe, model_dir) if not LIGHTGBM_AVAILABLE: logger.error("LightGBM not installed") return self.params = { 'boosting_type': 'gbdt', 'num_leaves': 64, # Increased from 31 'max_depth': -1, 'learning_rate': 0.05, # Slower learning 'n_estimators': 500, # More trees 'subsample': 0.7, 'colsample_bytree': 0.7, 'min_child_samples': 50, # Prevent overfitting on noise 'random_state': 42, 'verbose': -1, 'extra_trees': True, # Extra randomization 'n_jobs': 1 if IS_FROZEN else 2, # Throttled } @property def name(self) -> str: return "lightgbm"
[docs] def train(self, X: np.ndarray, y: np.ndarray, X_val: np.ndarray = None, y_val: np.ndarray = None, class_weights: dict = None) -> float: """Train LightGBM model with optional external validation set and class weights""" if not LIGHTGBM_AVAILABLE: logger.error("LightGBM not available") return 0.0 try: # Ensure y is 1D array y = np.asarray(y).ravel() # Detect number of classes dynamically unique_classes = np.unique(y) num_classes = len(unique_classes) logger.info(f"[LightGBM {self.timeframe}] Detected {num_classes} classes: {unique_classes}") # Adjust objective based on class count if num_classes == 2: self.params['objective'] = 'binary' if 'num_class' in self.params: del self.params['num_class'] else: self.params['objective'] = 'multiclass' self.params['num_class'] = num_classes # Use external val set if provided, else split internally if X_val is None or y_val is None: try: X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=default_settings.validation_ratio, random_state=42, stratify=y ) except ValueError: X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=default_settings.validation_ratio, random_state=42 ) else: X_train, y_train = X, y y_val = np.asarray(y_val).ravel() # Apply class weights if provided sample_weights = None if class_weights is not None and (hasattr(class_weights, '__len__') and len(class_weights) > 0): if isinstance(class_weights, dict): sample_weights = np.array([class_weights.get(int(label), 1.0) for label in y_train]) elif hasattr(class_weights, 'size') and class_weights.size > 0: sample_weights = class_weights # Create and train model self.model = lgb.LGBMClassifier(**self.params) self.model.fit( X_train, y_train, eval_set=[(X_val, y_val)], sample_weight=sample_weights, ) # Evaluate y_pred = self.model.predict(X_val) self.accuracy = balanced_accuracy_score(y_val.ravel(), y_pred.ravel()) self.is_trained = True logger.info(f"[LightGBM {self.timeframe}] ✓ Balanced Accuracy: {self.accuracy:.2%}") # Save model self.save() return self.accuracy except Exception as e: import traceback logger.error(f"LightGBM training failed: {e}") logger.error(traceback.format_exc()) return 0.0
[docs] def predict(self, X: np.ndarray) -> Tuple[str, float]: """Make prediction with LightGBM""" if not self.is_trained and not self.load(): logger.warning("LightGBM model not trained") return "NEUTRAL", 0.0 try: # Handle Feature Mismatch (Train vs Predict) # 1. New System expected_features = self.feature_names # 2. Legacy Fallback if not expected_features and hasattr(self.model, 'feature_name_'): expected_features = self.model.feature_name_ if hasattr(X, 'columns') and expected_features: # Check if we have all required features missing = [f for f in expected_features if f not in X.columns] if missing: logger.warning(f"LightGBM mismatch: Missing {len(missing)} features ({missing[:3]}...). Returning NEUTRAL.") return "NEUTRAL", 0.0 # Check for "Unnamed" features vs Named features mismatch (Legacy) if len(expected_features) > 0 and str(expected_features[0]).startswith("Column_") and not str(X.columns[0]).startswith("Column_"): logger.warning(f"LightGBM schema mismatch: Model expects raw features (Column_X) but got named features. Returning NEUTRAL.") return "NEUTRAL", 0.0 # Select only expected columns in correct order X = X[expected_features] # Ensure 2D input if hasattr(X, 'values'): X = X.values X = np.array(X) if X.ndim == 1: X = X.reshape(1, -1) # Get probabilities proba = self.model.predict_proba(X) pred_class = np.argmax(proba, axis=1)[0] confidence = proba[0][pred_class] signal = self.signal_from_prediction(pred_class) return signal, float(confidence) except Exception as e: logger.error(f"LightGBM prediction failed: {e}") return "NEUTRAL", 0.0
[docs] def load(self) -> bool: """Load model from pickle file""" if not self.model_path.exists(): return False try: with open(self.model_path, 'rb') as f: data = pickle.load(f) self.model = data['model'] self.accuracy = data.get('accuracy', 0.0) self.params = data.get('params', self.params) self.feature_names = data.get('feature_names', []) self.is_trained = True logger.info(f"LightGBM model loaded from {self.model_path} ({len(self.feature_names)} features)") return True except Exception as e: logger.error(f"Failed to load LightGBM model: {e}") return False