gaitsetpy.classification.models.random_forest
Random Forest Classification Model
This module contains the RandomForestModel class which inherits from BaseClassificationModel and provides Random Forest classification functionality.
Maintainer: @aharshit123456
1''' 2Random Forest Classification Model 3 4This module contains the RandomForestModel class which inherits from BaseClassificationModel 5and provides Random Forest classification functionality. 6 7Maintainer: @aharshit123456 8''' 9 10import joblib 11import numpy as np 12from typing import List, Dict, Any, Optional, Union 13from sklearn.ensemble import RandomForestClassifier 14from sklearn.model_selection import train_test_split 15from sklearn.metrics import accuracy_score, confusion_matrix, classification_report 16from ...core.base_classes import BaseClassificationModel 17from ..utils.preprocess import preprocess_features 18 19 20class RandomForestModel(BaseClassificationModel): 21 """ 22 Random Forest classification model. 23 24 This class provides Random Forest classification functionality with 25 comprehensive training, prediction, and evaluation capabilities. 26 """ 27 28 def __init__(self, n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None): 29 super().__init__( 30 name="random_forest", 31 description="Random Forest classifier for gait data classification" 32 ) 33 self.config = { 34 'n_estimators': n_estimators, 35 'random_state': random_state, 36 'max_depth': max_depth 37 } 38 self.model = RandomForestClassifier( 39 n_estimators=n_estimators, 40 random_state=random_state, 41 max_depth=max_depth 42 ) 43 self.feature_names = [] 44 self.class_names = [] 45 46 def train(self, features: List[Dict], **kwargs): 47 """ 48 Train the Random Forest model on the given features. 49 50 Args: 51 features: List of feature dictionaries 52 **kwargs: Additional arguments including test_size, validation_split 53 """ 54 # Preprocess features 55 X, y = preprocess_features(features) 56 57 # Store feature and class information 58 self.feature_names = [f"feature_{i}" for i in range(X.shape[1])] 59 self.class_names = list(set(y)) 60 61 # Split data if test_size is specified 62 test_size = kwargs.get('test_size', 0.2) 63 validation_split = kwargs.get('validation_split', True) 64 65 if validation_split: 66 X_train, X_test, y_train, y_test = train_test_split( 67 X, y, test_size=test_size, random_state=self.config['random_state'] 68 ) 69 70 # Train model 71 self.model.fit(X_train, y_train) 72 73 # Store validation data for later evaluation 74 self.X_test = X_test 75 self.y_test = y_test 76 77 # Print training accuracy 78 train_accuracy = self.model.score(X_train, y_train) 79 test_accuracy = self.model.score(X_test, y_test) 80 81 print(f"Training accuracy: {train_accuracy:.4f}") 82 print(f"Validation accuracy: {test_accuracy:.4f}") 83 else: 84 # Train on all data 85 self.model.fit(X, y) 86 train_accuracy = self.model.score(X, y) 87 print(f"Training accuracy: {train_accuracy:.4f}") 88 89 self.trained = True 90 print("Random Forest model trained successfully.") 91 92 def predict(self, features: List[Dict], **kwargs) -> Union[np.ndarray, Any]: 93 """ 94 Make predictions using the trained Random Forest model. 95 96 Args: 97 features: List of feature dictionaries 98 **kwargs: Additional arguments including return_probabilities 99 100 Returns: 101 Array of predictions or probabilities 102 """ 103 if not self.trained: 104 raise ValueError("Model must be trained before making predictions") 105 106 # Preprocess features 107 X, _ = preprocess_features(features) 108 109 # Make predictions 110 return_probabilities = kwargs.get('return_probabilities', False) 111 112 if return_probabilities: 113 return self.model.predict_proba(X) 114 else: 115 return self.model.predict(X) 116 117 def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]: 118 """ 119 Evaluate the Random Forest model performance. 120 121 Args: 122 features: List of feature dictionaries 123 **kwargs: Additional arguments including detailed_report 124 125 Returns: 126 Dictionary containing evaluation metrics 127 """ 128 if not self.trained: 129 raise ValueError("Model must be trained before evaluation") 130 131 # Use validation data if available, otherwise use provided features 132 if hasattr(self, 'X_test') and hasattr(self, 'y_test'): 133 X_test, y_test = self.X_test, self.y_test 134 else: 135 X_test, y_test = preprocess_features(features) 136 137 # Make predictions 138 y_pred = self.model.predict(X_test) 139 140 # Calculate metrics 141 accuracy = accuracy_score(y_test, y_pred) 142 conf_matrix = confusion_matrix(y_test, y_pred) 143 144 # Basic metrics 145 metrics = { 146 'accuracy': accuracy, 147 'confusion_matrix': conf_matrix.tolist() 148 } 149 150 # Detailed report if requested 151 detailed_report = kwargs.get('detailed_report', False) 152 if detailed_report: 153 class_report = classification_report(y_test, y_pred, output_dict=True) 154 metrics['classification_report'] = class_report 155 156 # Feature importance 157 if hasattr(self.model, 'feature_importances_'): 158 feature_importance = dict(zip(self.feature_names, self.model.feature_importances_)) 159 metrics['feature_importance'] = feature_importance 160 161 return metrics 162 163 def save_model(self, filepath: str): 164 """ 165 Save the trained Random Forest model to a file. 166 167 Args: 168 filepath: Path to save the model 169 """ 170 if not self.trained: 171 raise ValueError("Model must be trained before saving") 172 173 # Save model with additional metadata 174 model_data = { 175 'model': self.model, 176 'config': self.config, 177 'feature_names': self.feature_names, 178 'class_names': self.class_names, 179 'trained': self.trained 180 } 181 182 joblib.dump(model_data, filepath) 183 print(f"Random Forest model saved to {filepath}") 184 185 def load_model(self, filepath: str): 186 """ 187 Load a trained Random Forest model from a file. 188 189 Args: 190 filepath: Path to the saved model 191 """ 192 try: 193 model_data = joblib.load(filepath) 194 195 # Handle legacy model format 196 if isinstance(model_data, dict): 197 self.model = model_data['model'] 198 self.config = model_data.get('config', self.config) 199 self.feature_names = model_data.get('feature_names', []) 200 self.class_names = model_data.get('class_names', []) 201 self.trained = model_data.get('trained', True) 202 else: 203 # Legacy format - just the model 204 self.model = model_data 205 self.trained = True 206 207 print(f"Random Forest model loaded from {filepath}") 208 except Exception as e: 209 print(f"Error loading model: {e}") 210 raise 211 212 def get_feature_importance(self) -> Dict[str, float]: 213 """ 214 Get feature importance scores. 215 216 Returns: 217 Dictionary mapping feature names to importance scores 218 """ 219 if not self.trained: 220 raise ValueError("Model must be trained to get feature importance") 221 222 if hasattr(self.model, 'feature_importances_'): 223 return dict(zip(self.feature_names, self.model.feature_importances_)) 224 else: 225 return {} 226 227 def predict_single(self, single_features: Dict) -> int: 228 """ 229 Make prediction for a single feature vector. 230 231 Args: 232 single_features: Dictionary containing features for a single sample 233 234 Returns: 235 Predicted class 236 """ 237 if not self.trained: 238 raise ValueError("Model must be trained before making predictions") 239 240 # Convert single feature dict to format expected by preprocess_features 241 features_list = [single_features] 242 X, _ = preprocess_features(features_list) 243 244 return self.model.predict(X)[0] 245 246 247# Legacy function wrapper for backward compatibility 248def create_random_forest_model(n_estimators=100, random_state=42, max_depth=None): 249 """ 250 Create a Random Forest model with specified parameters. 251 252 Args: 253 n_estimators: Number of trees in the forest 254 random_state: Random state for reproducibility 255 max_depth: Maximum depth of the tree 256 257 Returns: 258 RandomForestModel instance 259 """ 260 return RandomForestModel(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
21class RandomForestModel(BaseClassificationModel): 22 """ 23 Random Forest classification model. 24 25 This class provides Random Forest classification functionality with 26 comprehensive training, prediction, and evaluation capabilities. 27 """ 28 29 def __init__(self, n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None): 30 super().__init__( 31 name="random_forest", 32 description="Random Forest classifier for gait data classification" 33 ) 34 self.config = { 35 'n_estimators': n_estimators, 36 'random_state': random_state, 37 'max_depth': max_depth 38 } 39 self.model = RandomForestClassifier( 40 n_estimators=n_estimators, 41 random_state=random_state, 42 max_depth=max_depth 43 ) 44 self.feature_names = [] 45 self.class_names = [] 46 47 def train(self, features: List[Dict], **kwargs): 48 """ 49 Train the Random Forest model on the given features. 50 51 Args: 52 features: List of feature dictionaries 53 **kwargs: Additional arguments including test_size, validation_split 54 """ 55 # Preprocess features 56 X, y = preprocess_features(features) 57 58 # Store feature and class information 59 self.feature_names = [f"feature_{i}" for i in range(X.shape[1])] 60 self.class_names = list(set(y)) 61 62 # Split data if test_size is specified 63 test_size = kwargs.get('test_size', 0.2) 64 validation_split = kwargs.get('validation_split', True) 65 66 if validation_split: 67 X_train, X_test, y_train, y_test = train_test_split( 68 X, y, test_size=test_size, random_state=self.config['random_state'] 69 ) 70 71 # Train model 72 self.model.fit(X_train, y_train) 73 74 # Store validation data for later evaluation 75 self.X_test = X_test 76 self.y_test = y_test 77 78 # Print training accuracy 79 train_accuracy = self.model.score(X_train, y_train) 80 test_accuracy = self.model.score(X_test, y_test) 81 82 print(f"Training accuracy: {train_accuracy:.4f}") 83 print(f"Validation accuracy: {test_accuracy:.4f}") 84 else: 85 # Train on all data 86 self.model.fit(X, y) 87 train_accuracy = self.model.score(X, y) 88 print(f"Training accuracy: {train_accuracy:.4f}") 89 90 self.trained = True 91 print("Random Forest model trained successfully.") 92 93 def predict(self, features: List[Dict], **kwargs) -> Union[np.ndarray, Any]: 94 """ 95 Make predictions using the trained Random Forest model. 96 97 Args: 98 features: List of feature dictionaries 99 **kwargs: Additional arguments including return_probabilities 100 101 Returns: 102 Array of predictions or probabilities 103 """ 104 if not self.trained: 105 raise ValueError("Model must be trained before making predictions") 106 107 # Preprocess features 108 X, _ = preprocess_features(features) 109 110 # Make predictions 111 return_probabilities = kwargs.get('return_probabilities', False) 112 113 if return_probabilities: 114 return self.model.predict_proba(X) 115 else: 116 return self.model.predict(X) 117 118 def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]: 119 """ 120 Evaluate the Random Forest model performance. 121 122 Args: 123 features: List of feature dictionaries 124 **kwargs: Additional arguments including detailed_report 125 126 Returns: 127 Dictionary containing evaluation metrics 128 """ 129 if not self.trained: 130 raise ValueError("Model must be trained before evaluation") 131 132 # Use validation data if available, otherwise use provided features 133 if hasattr(self, 'X_test') and hasattr(self, 'y_test'): 134 X_test, y_test = self.X_test, self.y_test 135 else: 136 X_test, y_test = preprocess_features(features) 137 138 # Make predictions 139 y_pred = self.model.predict(X_test) 140 141 # Calculate metrics 142 accuracy = accuracy_score(y_test, y_pred) 143 conf_matrix = confusion_matrix(y_test, y_pred) 144 145 # Basic metrics 146 metrics = { 147 'accuracy': accuracy, 148 'confusion_matrix': conf_matrix.tolist() 149 } 150 151 # Detailed report if requested 152 detailed_report = kwargs.get('detailed_report', False) 153 if detailed_report: 154 class_report = classification_report(y_test, y_pred, output_dict=True) 155 metrics['classification_report'] = class_report 156 157 # Feature importance 158 if hasattr(self.model, 'feature_importances_'): 159 feature_importance = dict(zip(self.feature_names, self.model.feature_importances_)) 160 metrics['feature_importance'] = feature_importance 161 162 return metrics 163 164 def save_model(self, filepath: str): 165 """ 166 Save the trained Random Forest model to a file. 167 168 Args: 169 filepath: Path to save the model 170 """ 171 if not self.trained: 172 raise ValueError("Model must be trained before saving") 173 174 # Save model with additional metadata 175 model_data = { 176 'model': self.model, 177 'config': self.config, 178 'feature_names': self.feature_names, 179 'class_names': self.class_names, 180 'trained': self.trained 181 } 182 183 joblib.dump(model_data, filepath) 184 print(f"Random Forest model saved to {filepath}") 185 186 def load_model(self, filepath: str): 187 """ 188 Load a trained Random Forest model from a file. 189 190 Args: 191 filepath: Path to the saved model 192 """ 193 try: 194 model_data = joblib.load(filepath) 195 196 # Handle legacy model format 197 if isinstance(model_data, dict): 198 self.model = model_data['model'] 199 self.config = model_data.get('config', self.config) 200 self.feature_names = model_data.get('feature_names', []) 201 self.class_names = model_data.get('class_names', []) 202 self.trained = model_data.get('trained', True) 203 else: 204 # Legacy format - just the model 205 self.model = model_data 206 self.trained = True 207 208 print(f"Random Forest model loaded from {filepath}") 209 except Exception as e: 210 print(f"Error loading model: {e}") 211 raise 212 213 def get_feature_importance(self) -> Dict[str, float]: 214 """ 215 Get feature importance scores. 216 217 Returns: 218 Dictionary mapping feature names to importance scores 219 """ 220 if not self.trained: 221 raise ValueError("Model must be trained to get feature importance") 222 223 if hasattr(self.model, 'feature_importances_'): 224 return dict(zip(self.feature_names, self.model.feature_importances_)) 225 else: 226 return {} 227 228 def predict_single(self, single_features: Dict) -> int: 229 """ 230 Make prediction for a single feature vector. 231 232 Args: 233 single_features: Dictionary containing features for a single sample 234 235 Returns: 236 Predicted class 237 """ 238 if not self.trained: 239 raise ValueError("Model must be trained before making predictions") 240 241 # Convert single feature dict to format expected by preprocess_features 242 features_list = [single_features] 243 X, _ = preprocess_features(features_list) 244 245 return self.model.predict(X)[0]
Random Forest classification model.
This class provides Random Forest classification functionality with comprehensive training, prediction, and evaluation capabilities.
29 def __init__(self, n_estimators: int = 100, random_state: int = 42, max_depth: Optional[int] = None): 30 super().__init__( 31 name="random_forest", 32 description="Random Forest classifier for gait data classification" 33 ) 34 self.config = { 35 'n_estimators': n_estimators, 36 'random_state': random_state, 37 'max_depth': max_depth 38 } 39 self.model = RandomForestClassifier( 40 n_estimators=n_estimators, 41 random_state=random_state, 42 max_depth=max_depth 43 ) 44 self.feature_names = [] 45 self.class_names = []
Initialize the classification model.
Args: name: Name of the classification model description: Description of the classification model
47 def train(self, features: List[Dict], **kwargs): 48 """ 49 Train the Random Forest model on the given features. 50 51 Args: 52 features: List of feature dictionaries 53 **kwargs: Additional arguments including test_size, validation_split 54 """ 55 # Preprocess features 56 X, y = preprocess_features(features) 57 58 # Store feature and class information 59 self.feature_names = [f"feature_{i}" for i in range(X.shape[1])] 60 self.class_names = list(set(y)) 61 62 # Split data if test_size is specified 63 test_size = kwargs.get('test_size', 0.2) 64 validation_split = kwargs.get('validation_split', True) 65 66 if validation_split: 67 X_train, X_test, y_train, y_test = train_test_split( 68 X, y, test_size=test_size, random_state=self.config['random_state'] 69 ) 70 71 # Train model 72 self.model.fit(X_train, y_train) 73 74 # Store validation data for later evaluation 75 self.X_test = X_test 76 self.y_test = y_test 77 78 # Print training accuracy 79 train_accuracy = self.model.score(X_train, y_train) 80 test_accuracy = self.model.score(X_test, y_test) 81 82 print(f"Training accuracy: {train_accuracy:.4f}") 83 print(f"Validation accuracy: {test_accuracy:.4f}") 84 else: 85 # Train on all data 86 self.model.fit(X, y) 87 train_accuracy = self.model.score(X, y) 88 print(f"Training accuracy: {train_accuracy:.4f}") 89 90 self.trained = True 91 print("Random Forest model trained successfully.")
Train the Random Forest model on the given features.
Args: features: List of feature dictionaries **kwargs: Additional arguments including test_size, validation_split
93 def predict(self, features: List[Dict], **kwargs) -> Union[np.ndarray, Any]: 94 """ 95 Make predictions using the trained Random Forest model. 96 97 Args: 98 features: List of feature dictionaries 99 **kwargs: Additional arguments including return_probabilities 100 101 Returns: 102 Array of predictions or probabilities 103 """ 104 if not self.trained: 105 raise ValueError("Model must be trained before making predictions") 106 107 # Preprocess features 108 X, _ = preprocess_features(features) 109 110 # Make predictions 111 return_probabilities = kwargs.get('return_probabilities', False) 112 113 if return_probabilities: 114 return self.model.predict_proba(X) 115 else: 116 return self.model.predict(X)
Make predictions using the trained Random Forest model.
Args: features: List of feature dictionaries **kwargs: Additional arguments including return_probabilities
Returns: Array of predictions or probabilities
118 def evaluate(self, features: List[Dict], **kwargs) -> Dict[str, float]: 119 """ 120 Evaluate the Random Forest model performance. 121 122 Args: 123 features: List of feature dictionaries 124 **kwargs: Additional arguments including detailed_report 125 126 Returns: 127 Dictionary containing evaluation metrics 128 """ 129 if not self.trained: 130 raise ValueError("Model must be trained before evaluation") 131 132 # Use validation data if available, otherwise use provided features 133 if hasattr(self, 'X_test') and hasattr(self, 'y_test'): 134 X_test, y_test = self.X_test, self.y_test 135 else: 136 X_test, y_test = preprocess_features(features) 137 138 # Make predictions 139 y_pred = self.model.predict(X_test) 140 141 # Calculate metrics 142 accuracy = accuracy_score(y_test, y_pred) 143 conf_matrix = confusion_matrix(y_test, y_pred) 144 145 # Basic metrics 146 metrics = { 147 'accuracy': accuracy, 148 'confusion_matrix': conf_matrix.tolist() 149 } 150 151 # Detailed report if requested 152 detailed_report = kwargs.get('detailed_report', False) 153 if detailed_report: 154 class_report = classification_report(y_test, y_pred, output_dict=True) 155 metrics['classification_report'] = class_report 156 157 # Feature importance 158 if hasattr(self.model, 'feature_importances_'): 159 feature_importance = dict(zip(self.feature_names, self.model.feature_importances_)) 160 metrics['feature_importance'] = feature_importance 161 162 return metrics
Evaluate the Random Forest model performance.
Args: features: List of feature dictionaries **kwargs: Additional arguments including detailed_report
Returns: Dictionary containing evaluation metrics
164 def save_model(self, filepath: str): 165 """ 166 Save the trained Random Forest model to a file. 167 168 Args: 169 filepath: Path to save the model 170 """ 171 if not self.trained: 172 raise ValueError("Model must be trained before saving") 173 174 # Save model with additional metadata 175 model_data = { 176 'model': self.model, 177 'config': self.config, 178 'feature_names': self.feature_names, 179 'class_names': self.class_names, 180 'trained': self.trained 181 } 182 183 joblib.dump(model_data, filepath) 184 print(f"Random Forest model saved to {filepath}")
Save the trained Random Forest model to a file.
Args: filepath: Path to save the model
186 def load_model(self, filepath: str): 187 """ 188 Load a trained Random Forest model from a file. 189 190 Args: 191 filepath: Path to the saved model 192 """ 193 try: 194 model_data = joblib.load(filepath) 195 196 # Handle legacy model format 197 if isinstance(model_data, dict): 198 self.model = model_data['model'] 199 self.config = model_data.get('config', self.config) 200 self.feature_names = model_data.get('feature_names', []) 201 self.class_names = model_data.get('class_names', []) 202 self.trained = model_data.get('trained', True) 203 else: 204 # Legacy format - just the model 205 self.model = model_data 206 self.trained = True 207 208 print(f"Random Forest model loaded from {filepath}") 209 except Exception as e: 210 print(f"Error loading model: {e}") 211 raise
Load a trained Random Forest model from a file.
Args: filepath: Path to the saved model
213 def get_feature_importance(self) -> Dict[str, float]: 214 """ 215 Get feature importance scores. 216 217 Returns: 218 Dictionary mapping feature names to importance scores 219 """ 220 if not self.trained: 221 raise ValueError("Model must be trained to get feature importance") 222 223 if hasattr(self.model, 'feature_importances_'): 224 return dict(zip(self.feature_names, self.model.feature_importances_)) 225 else: 226 return {}
Get feature importance scores.
Returns: Dictionary mapping feature names to importance scores
228 def predict_single(self, single_features: Dict) -> int: 229 """ 230 Make prediction for a single feature vector. 231 232 Args: 233 single_features: Dictionary containing features for a single sample 234 235 Returns: 236 Predicted class 237 """ 238 if not self.trained: 239 raise ValueError("Model must be trained before making predictions") 240 241 # Convert single feature dict to format expected by preprocess_features 242 features_list = [single_features] 243 X, _ = preprocess_features(features_list) 244 245 return self.model.predict(X)[0]
Make prediction for a single feature vector.
Args: single_features: Dictionary containing features for a single sample
Returns: Predicted class
Inherited Members
249def create_random_forest_model(n_estimators=100, random_state=42, max_depth=None): 250 """ 251 Create a Random Forest model with specified parameters. 252 253 Args: 254 n_estimators: Number of trees in the forest 255 random_state: Random state for reproducibility 256 max_depth: Maximum depth of the tree 257 258 Returns: 259 RandomForestModel instance 260 """ 261 return RandomForestModel(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
Create a Random Forest model with specified parameters.
Args: n_estimators: Number of trees in the forest random_state: Random state for reproducibility max_depth: Maximum depth of the tree
Returns: RandomForestModel instance