gaitsetpy.classification.models.random_forest

Random Forest Classification Model

This module contains the RandomForestModel class which inherits from BaseClassificationModel and provides Random Forest classification functionality.

Maintainer: @aharshit123456

  1'''
  2Random Forest Classification Model
  3
  4This module contains the RandomForestModel class which inherits from BaseClassificationModel
  5and provides Random Forest classification functionality.
  6
  7Maintainer: @aharshit123456
  8'''
  9
 10import joblib
 11import numpy as np
 12from typing import List, Dict, Any, Optional, Union
 13from sklearn.ensemble import RandomForestClassifier
 14from sklearn.model_selection import train_test_split
 15from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
 16from ...core.base_classes import BaseClassificationModel
 17from ..utils.preprocess import preprocess_features
 18
 19
 20class RandomForestModel(BaseClassificationModel):
 21    """
 22    Random Forest classification model.
 23    
 24    This class provides Random Forest classification functionality with
 25    comprehensive training, prediction, and evaluation capabilities.
 26    """
 27    
 28    def __init__(self, n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None):
 29        super().__init__(
 30            name="random_forest",
 31            description="Random Forest classifier for gait data classification"
 32        )
 33        self.config = {
 34            'n_estimators': n_estimators,
 35            'random_state': random_state,
 36            'max_depth': max_depth
 37        }
 38        self.model = RandomForestClassifier(
 39            n_estimators=n_estimators,
 40            random_state=random_state,
 41            max_depth=max_depth
 42        )
 43        self.feature_names = []
 44        self.class_names = []
 45        
 46    def train(self, features: List[Dict], **kwargs):
 47        """
 48        Train the Random Forest model on the given features.
 49        
 50        Args:
 51            features: List of feature dictionaries
 52            **kwargs: Additional arguments including test_size, validation_split
 53        """
 54        # Preprocess features
 55        X, y = preprocess_features(features)
 56        
 57        # Store feature and class information
 58        self.feature_names = [f"feature_{i}" for i in range(X.shape[1])]
 59        self.class_names = list(set(y))
 60        
 61        # Split data if test_size is specified
 62        test_size = kwargs.get('test_size', 0.2)
 63        validation_split = kwargs.get('validation_split', True)
 64        
 65        if validation_split:
 66            X_train, X_test, y_train, y_test = train_test_split(
 67                X, y, test_size=test_size, random_state=self.config['random_state']
 68            )
 69            
 70            # Train model
 71            self.model.fit(X_train, y_train)
 72            
 73            # Store validation data for later evaluation
 74            self.X_test = X_test
 75            self.y_test = y_test
 76            
 77            # Print training accuracy
 78            train_accuracy = self.model.score(X_train, y_train)
 79            test_accuracy = self.model.score(X_test, y_test)
 80            
 81            print(f"Training accuracy: {train_accuracy:.4f}")
 82            print(f"Validation accuracy: {test_accuracy:.4f}")
 83        else:
 84            # Train on all data
 85            self.model.fit(X, y)
 86            train_accuracy = self.model.score(X, y)
 87            print(f"Training accuracy: {train_accuracy:.4f}")
 88        
 89        self.trained = True
 90        print("Random Forest model trained successfully.")
 91    
 92    def predict(self, features: List[Dict], **kwargs) -> Union[np.ndarray, Any]:
 93        """
 94        Make predictions using the trained Random Forest model.
 95        
 96        Args:
 97            features: List of feature dictionaries
 98            **kwargs: Additional arguments including return_probabilities
 99            
100        Returns:
101            Array of predictions or probabilities
102        """
103        if not self.trained:
104            raise ValueError("Model must be trained before making predictions")
105        
106        # Preprocess features
107        X, _ = preprocess_features(features)
108        
109        # Make predictions
110        return_probabilities = kwargs.get('return_probabilities', False)
111        
112        if return_probabilities:
113            return self.model.predict_proba(X)
114        else:
115            return self.model.predict(X)
116    
117    def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
118        """
119        Evaluate the Random Forest model performance.
120        
121        Args:
122            features: List of feature dictionaries
123            **kwargs: Additional arguments including detailed_report
124            
125        Returns:
126            Dictionary containing evaluation metrics
127        """
128        if not self.trained:
129            raise ValueError("Model must be trained before evaluation")
130        
131        # Use validation data if available, otherwise use provided features
132        if hasattr(self, 'X_test') and hasattr(self, 'y_test'):
133            X_test, y_test = self.X_test, self.y_test
134        else:
135            X_test, y_test = preprocess_features(features)
136        
137        # Make predictions
138        y_pred = self.model.predict(X_test)
139        
140        # Calculate metrics
141        accuracy = accuracy_score(y_test, y_pred)
142        conf_matrix = confusion_matrix(y_test, y_pred)
143        
144        # Basic metrics
145        metrics = {
146            'accuracy': accuracy,
147            'confusion_matrix': conf_matrix.tolist()
148        }
149        
150        # Detailed report if requested
151        detailed_report = kwargs.get('detailed_report', False)
152        if detailed_report:
153            class_report = classification_report(y_test, y_pred, output_dict=True)
154            metrics['classification_report'] = class_report
155            
156            # Feature importance
157            if hasattr(self.model, 'feature_importances_'):
158                feature_importance = dict(zip(self.feature_names, self.model.feature_importances_))
159                metrics['feature_importance'] = feature_importance
160        
161        return metrics
162    
163    def save_model(self, filepath: str):
164        """
165        Save the trained Random Forest model to a file.
166        
167        Args:
168            filepath: Path to save the model
169        """
170        if not self.trained:
171            raise ValueError("Model must be trained before saving")
172        
173        # Save model with additional metadata
174        model_data = {
175            'model': self.model,
176            'config': self.config,
177            'feature_names': self.feature_names,
178            'class_names': self.class_names,
179            'trained': self.trained
180        }
181        
182        joblib.dump(model_data, filepath)
183        print(f"Random Forest model saved to {filepath}")
184    
185    def load_model(self, filepath: str):
186        """
187        Load a trained Random Forest model from a file.
188        
189        Args:
190            filepath: Path to the saved model
191        """
192        try:
193            model_data = joblib.load(filepath)
194            
195            # Handle legacy model format
196            if isinstance(model_data, dict):
197                self.model = model_data['model']
198                self.config = model_data.get('config', self.config)
199                self.feature_names = model_data.get('feature_names', [])
200                self.class_names = model_data.get('class_names', [])
201                self.trained = model_data.get('trained', True)
202            else:
203                # Legacy format - just the model
204                self.model = model_data
205                self.trained = True
206            
207            print(f"Random Forest model loaded from {filepath}")
208        except Exception as e:
209            print(f"Error loading model: {e}")
210            raise
211    
212    def get_feature_importance(self) -> Dict[str, float]:
213        """
214        Get feature importance scores.
215        
216        Returns:
217            Dictionary mapping feature names to importance scores
218        """
219        if not self.trained:
220            raise ValueError("Model must be trained to get feature importance")
221        
222        if hasattr(self.model, 'feature_importances_'):
223            return dict(zip(self.feature_names, self.model.feature_importances_))
224        else:
225            return {}
226    
227    def predict_single(self, single_features: Dict) -> int:
228        """
229        Make prediction for a single feature vector.
230        
231        Args:
232            single_features: Dictionary containing features for a single sample
233            
234        Returns:
235            Predicted class
236        """
237        if not self.trained:
238            raise ValueError("Model must be trained before making predictions")
239        
240        # Convert single feature dict to format expected by preprocess_features
241        features_list = [single_features]
242        X, _ = preprocess_features(features_list)
243        
244        return self.model.predict(X)[0]
245
246
247# Legacy function wrapper for backward compatibility
248def create_random_forest_model(n_estimators=100, random_state=42, max_depth=None):
249    """
250    Create a Random Forest model with specified parameters.
251    
252    Args:
253        n_estimators: Number of trees in the forest
254        random_state: Random state for reproducibility
255        max_depth: Maximum depth of the tree
256        
257    Returns:
258        RandomForestModel instance
259    """
260    return RandomForestModel(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
class RandomForestModel(gaitsetpy.core.base_classes.BaseClassificationModel):
 21class RandomForestModel(BaseClassificationModel):
 22    """
 23    Random Forest classification model.
 24    
 25    This class provides Random Forest classification functionality with
 26    comprehensive training, prediction, and evaluation capabilities.
 27    """
 28    
 29    def __init__(self, n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None):
 30        super().__init__(
 31            name="random_forest",
 32            description="Random Forest classifier for gait data classification"
 33        )
 34        self.config = {
 35            'n_estimators': n_estimators,
 36            'random_state': random_state,
 37            'max_depth': max_depth
 38        }
 39        self.model = RandomForestClassifier(
 40            n_estimators=n_estimators,
 41            random_state=random_state,
 42            max_depth=max_depth
 43        )
 44        self.feature_names = []
 45        self.class_names = []
 46        
 47    def train(self, features: List[Dict], **kwargs):
 48        """
 49        Train the Random Forest model on the given features.
 50        
 51        Args:
 52            features: List of feature dictionaries
 53            **kwargs: Additional arguments including test_size, validation_split
 54        """
 55        # Preprocess features
 56        X, y = preprocess_features(features)
 57        
 58        # Store feature and class information
 59        self.feature_names = [f"feature_{i}" for i in range(X.shape[1])]
 60        self.class_names = list(set(y))
 61        
 62        # Split data if test_size is specified
 63        test_size = kwargs.get('test_size', 0.2)
 64        validation_split = kwargs.get('validation_split', True)
 65        
 66        if validation_split:
 67            X_train, X_test, y_train, y_test = train_test_split(
 68                X, y, test_size=test_size, random_state=self.config['random_state']
 69            )
 70            
 71            # Train model
 72            self.model.fit(X_train, y_train)
 73            
 74            # Store validation data for later evaluation
 75            self.X_test = X_test
 76            self.y_test = y_test
 77            
 78            # Print training accuracy
 79            train_accuracy = self.model.score(X_train, y_train)
 80            test_accuracy = self.model.score(X_test, y_test)
 81            
 82            print(f"Training accuracy: {train_accuracy:.4f}")
 83            print(f"Validation accuracy: {test_accuracy:.4f}")
 84        else:
 85            # Train on all data
 86            self.model.fit(X, y)
 87            train_accuracy = self.model.score(X, y)
 88            print(f"Training accuracy: {train_accuracy:.4f}")
 89        
 90        self.trained = True
 91        print("Random Forest model trained successfully.")
 92    
 93    def predict(self, features: List[Dict], **kwargs) -> Union[np.ndarray, Any]:
 94        """
 95        Make predictions using the trained Random Forest model.
 96        
 97        Args:
 98            features: List of feature dictionaries
 99            **kwargs: Additional arguments including return_probabilities
100            
101        Returns:
102            Array of predictions or probabilities
103        """
104        if not self.trained:
105            raise ValueError("Model must be trained before making predictions")
106        
107        # Preprocess features
108        X, _ = preprocess_features(features)
109        
110        # Make predictions
111        return_probabilities = kwargs.get('return_probabilities', False)
112        
113        if return_probabilities:
114            return self.model.predict_proba(X)
115        else:
116            return self.model.predict(X)
117    
118    def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
119        """
120        Evaluate the Random Forest model performance.
121        
122        Args:
123            features: List of feature dictionaries
124            **kwargs: Additional arguments including detailed_report
125            
126        Returns:
127            Dictionary containing evaluation metrics
128        """
129        if not self.trained:
130            raise ValueError("Model must be trained before evaluation")
131        
132        # Use validation data if available, otherwise use provided features
133        if hasattr(self, 'X_test') and hasattr(self, 'y_test'):
134            X_test, y_test = self.X_test, self.y_test
135        else:
136            X_test, y_test = preprocess_features(features)
137        
138        # Make predictions
139        y_pred = self.model.predict(X_test)
140        
141        # Calculate metrics
142        accuracy = accuracy_score(y_test, y_pred)
143        conf_matrix = confusion_matrix(y_test, y_pred)
144        
145        # Basic metrics
146        metrics = {
147            'accuracy': accuracy,
148            'confusion_matrix': conf_matrix.tolist()
149        }
150        
151        # Detailed report if requested
152        detailed_report = kwargs.get('detailed_report', False)
153        if detailed_report:
154            class_report = classification_report(y_test, y_pred, output_dict=True)
155            metrics['classification_report'] = class_report
156            
157            # Feature importance
158            if hasattr(self.model, 'feature_importances_'):
159                feature_importance = dict(zip(self.feature_names, self.model.feature_importances_))
160                metrics['feature_importance'] = feature_importance
161        
162        return metrics
163    
164    def save_model(self, filepath: str):
165        """
166        Save the trained Random Forest model to a file.
167        
168        Args:
169            filepath: Path to save the model
170        """
171        if not self.trained:
172            raise ValueError("Model must be trained before saving")
173        
174        # Save model with additional metadata
175        model_data = {
176            'model': self.model,
177            'config': self.config,
178            'feature_names': self.feature_names,
179            'class_names': self.class_names,
180            'trained': self.trained
181        }
182        
183        joblib.dump(model_data, filepath)
184        print(f"Random Forest model saved to {filepath}")
185    
186    def load_model(self, filepath: str):
187        """
188        Load a trained Random Forest model from a file.
189        
190        Args:
191            filepath: Path to the saved model
192        """
193        try:
194            model_data = joblib.load(filepath)
195            
196            # Handle legacy model format
197            if isinstance(model_data, dict):
198                self.model = model_data['model']
199                self.config = model_data.get('config', self.config)
200                self.feature_names = model_data.get('feature_names', [])
201                self.class_names = model_data.get('class_names', [])
202                self.trained = model_data.get('trained', True)
203            else:
204                # Legacy format - just the model
205                self.model = model_data
206                self.trained = True
207            
208            print(f"Random Forest model loaded from {filepath}")
209        except Exception as e:
210            print(f"Error loading model: {e}")
211            raise
212    
213    def get_feature_importance(self) -> Dict[str, float]:
214        """
215        Get feature importance scores.
216        
217        Returns:
218            Dictionary mapping feature names to importance scores
219        """
220        if not self.trained:
221            raise ValueError("Model must be trained to get feature importance")
222        
223        if hasattr(self.model, 'feature_importances_'):
224            return dict(zip(self.feature_names, self.model.feature_importances_))
225        else:
226            return {}
227    
228    def predict_single(self, single_features: Dict) -> int:
229        """
230        Make prediction for a single feature vector.
231        
232        Args:
233            single_features: Dictionary containing features for a single sample
234            
235        Returns:
236            Predicted class
237        """
238        if not self.trained:
239            raise ValueError("Model must be trained before making predictions")
240        
241        # Convert single feature dict to format expected by preprocess_features
242        features_list = [single_features]
243        X, _ = preprocess_features(features_list)
244        
245        return self.model.predict(X)[0]

Random Forest classification model.

This class provides Random Forest classification functionality with comprehensive training, prediction, and evaluation capabilities.

RandomForestModel( n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None)
29    def __init__(self, n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None):
30        super().__init__(
31            name="random_forest",
32            description="Random Forest classifier for gait data classification"
33        )
34        self.config = {
35            'n_estimators': n_estimators,
36            'random_state': random_state,
37            'max_depth': max_depth
38        }
39        self.model = RandomForestClassifier(
40            n_estimators=n_estimators,
41            random_state=random_state,
42            max_depth=max_depth
43        )
44        self.feature_names = []
45        self.class_names = []

Initialize the classification model.

Args: name: Name of the classification model description: Description of the classification model

config
model
feature_names
class_names
def train(self, features: List[Dict], **kwargs):
47    def train(self, features: List[Dict], **kwargs):
48        """
49        Train the Random Forest model on the given features.
50        
51        Args:
52            features: List of feature dictionaries
53            **kwargs: Additional arguments including test_size, validation_split
54        """
55        # Preprocess features
56        X, y = preprocess_features(features)
57        
58        # Store feature and class information
59        self.feature_names = [f"feature_{i}" for i in range(X.shape[1])]
60        self.class_names = list(set(y))
61        
62        # Split data if test_size is specified
63        test_size = kwargs.get('test_size', 0.2)
64        validation_split = kwargs.get('validation_split', True)
65        
66        if validation_split:
67            X_train, X_test, y_train, y_test = train_test_split(
68                X, y, test_size=test_size, random_state=self.config['random_state']
69            )
70            
71            # Train model
72            self.model.fit(X_train, y_train)
73            
74            # Store validation data for later evaluation
75            self.X_test = X_test
76            self.y_test = y_test
77            
78            # Print training accuracy
79            train_accuracy = self.model.score(X_train, y_train)
80            test_accuracy = self.model.score(X_test, y_test)
81            
82            print(f"Training accuracy: {train_accuracy:.4f}")
83            print(f"Validation accuracy: {test_accuracy:.4f}")
84        else:
85            # Train on all data
86            self.model.fit(X, y)
87            train_accuracy = self.model.score(X, y)
88            print(f"Training accuracy: {train_accuracy:.4f}")
89        
90        self.trained = True
91        print("Random Forest model trained successfully.")

Train the Random Forest model on the given features.

Args: features: List of feature dictionaries **kwargs: Additional arguments including test_size, validation_split

def predict(self, features: List[Dict], **kwargs) -> Union[numpy.ndarray, Any]:
 93    def predict(self, features: List[Dict], **kwargs) -> Union[np.ndarray, Any]:
 94        """
 95        Make predictions using the trained Random Forest model.
 96        
 97        Args:
 98            features: List of feature dictionaries
 99            **kwargs: Additional arguments including return_probabilities
100            
101        Returns:
102            Array of predictions or probabilities
103        """
104        if not self.trained:
105            raise ValueError("Model must be trained before making predictions")
106        
107        # Preprocess features
108        X, _ = preprocess_features(features)
109        
110        # Make predictions
111        return_probabilities = kwargs.get('return_probabilities', False)
112        
113        if return_probabilities:
114            return self.model.predict_proba(X)
115        else:
116            return self.model.predict(X)

Make predictions using the trained Random Forest model.

Args: features: List of feature dictionaries **kwargs: Additional arguments including return_probabilities

Returns: Array of predictions or probabilities

def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
118    def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]:
119        """
120        Evaluate the Random Forest model performance.
121        
122        Args:
123            features: List of feature dictionaries
124            **kwargs: Additional arguments including detailed_report
125            
126        Returns:
127            Dictionary containing evaluation metrics
128        """
129        if not self.trained:
130            raise ValueError("Model must be trained before evaluation")
131        
132        # Use validation data if available, otherwise use provided features
133        if hasattr(self, 'X_test') and hasattr(self, 'y_test'):
134            X_test, y_test = self.X_test, self.y_test
135        else:
136            X_test, y_test = preprocess_features(features)
137        
138        # Make predictions
139        y_pred = self.model.predict(X_test)
140        
141        # Calculate metrics
142        accuracy = accuracy_score(y_test, y_pred)
143        conf_matrix = confusion_matrix(y_test, y_pred)
144        
145        # Basic metrics
146        metrics = {
147            'accuracy': accuracy,
148            'confusion_matrix': conf_matrix.tolist()
149        }
150        
151        # Detailed report if requested
152        detailed_report = kwargs.get('detailed_report', False)
153        if detailed_report:
154            class_report = classification_report(y_test, y_pred, output_dict=True)
155            metrics['classification_report'] = class_report
156            
157            # Feature importance
158            if hasattr(self.model, 'feature_importances_'):
159                feature_importance = dict(zip(self.feature_names, self.model.feature_importances_))
160                metrics['feature_importance'] = feature_importance
161        
162        return metrics

Evaluate the Random Forest model performance.

Args: features: List of feature dictionaries **kwargs: Additional arguments including detailed_report

Returns: Dictionary containing evaluation metrics

def save_model(self, filepath: str):
164    def save_model(self, filepath: str):
165        """
166        Save the trained Random Forest model to a file.
167        
168        Args:
169            filepath: Path to save the model
170        """
171        if not self.trained:
172            raise ValueError("Model must be trained before saving")
173        
174        # Save model with additional metadata
175        model_data = {
176            'model': self.model,
177            'config': self.config,
178            'feature_names': self.feature_names,
179            'class_names': self.class_names,
180            'trained': self.trained
181        }
182        
183        joblib.dump(model_data, filepath)
184        print(f"Random Forest model saved to {filepath}")

Save the trained Random Forest model to a file.

Args: filepath: Path to save the model

def load_model(self, filepath: str):
186    def load_model(self, filepath: str):
187        """
188        Load a trained Random Forest model from a file.
189        
190        Args:
191            filepath: Path to the saved model
192        """
193        try:
194            model_data = joblib.load(filepath)
195            
196            # Handle legacy model format
197            if isinstance(model_data, dict):
198                self.model = model_data['model']
199                self.config = model_data.get('config', self.config)
200                self.feature_names = model_data.get('feature_names', [])
201                self.class_names = model_data.get('class_names', [])
202                self.trained = model_data.get('trained', True)
203            else:
204                # Legacy format - just the model
205                self.model = model_data
206                self.trained = True
207            
208            print(f"Random Forest model loaded from {filepath}")
209        except Exception as e:
210            print(f"Error loading model: {e}")
211            raise

Load a trained Random Forest model from a file.

Args: filepath: Path to the saved model

def get_feature_importance(self) -> Dict[str, float]:
213    def get_feature_importance(self) -> Dict[str, float]:
214        """
215        Get feature importance scores.
216        
217        Returns:
218            Dictionary mapping feature names to importance scores
219        """
220        if not self.trained:
221            raise ValueError("Model must be trained to get feature importance")
222        
223        if hasattr(self.model, 'feature_importances_'):
224            return dict(zip(self.feature_names, self.model.feature_importances_))
225        else:
226            return {}

Get feature importance scores.

Returns: Dictionary mapping feature names to importance scores

def predict_single(self, single_features: Dict) -> int:
228    def predict_single(self, single_features: Dict) -> int:
229        """
230        Make prediction for a single feature vector.
231        
232        Args:
233            single_features: Dictionary containing features for a single sample
234            
235        Returns:
236            Predicted class
237        """
238        if not self.trained:
239            raise ValueError("Model must be trained before making predictions")
240        
241        # Convert single feature dict to format expected by preprocess_features
242        features_list = [single_features]
243        X, _ = preprocess_features(features_list)
244        
245        return self.model.predict(X)[0]

Make prediction for a single feature vector.

Args: single_features: Dictionary containing features for a single sample

Returns: Predicted class

def create_random_forest_model(n_estimators=100, random_state=42, max_depth=None):
249def create_random_forest_model(n_estimators=100, random_state=42, max_depth=None):
250    """
251    Create a Random Forest model with specified parameters.
252    
253    Args:
254        n_estimators: Number of trees in the forest
255        random_state: Random state for reproducibility
256        max_depth: Maximum depth of the tree
257        
258    Returns:
259        RandomForestModel instance
260    """
261    return RandomForestModel(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)

Create a Random Forest model with specified parameters.

Args: n_estimators: Number of trees in the forest random_state: Random state for reproducibility max_depth: Maximum depth of the tree

Returns: RandomForestModel instance