"""
价格分布统计性能优化测试

测试价格分布统计的性能优化功能，包括向量化操作、并行处理、内存优化和性能监控
"""

import pytest
import asyncio
import pandas as pd
import numpy as np
import time
import logging
from unittest.mock import Mock, patch, MagicMock
from datetime import datetime, timedelta

from quickstock.utils.price_distribution_performance import (
    PriceDistributionPerformanceOptimizer,
    PriceDistributionPerformanceConfig,
    PriceDistributionVectorizedOperations,
    PriceDistributionParallelProcessor,
    PriceDistributionMemoryOptimizer
)
from quickstock.models.price_distribution_models import DistributionRange
from quickstock.core.price_distribution_errors import (
    DistributionCalculationError,
    StatisticsAggregationError
)


class TestPriceDistributionVectorizedOperations:
    """测试价格分布向量化操作"""
    
    @pytest.fixture
    def vectorized_ops(self):
        """创建向量化操作实例"""
        return PriceDistributionVectorizedOperations()
    
    @pytest.fixture
    def sample_stock_data(self):
        """创建示例股票数据"""
        np.random.seed(42)
        n_stocks = 1000
        
        data = {
            'ts_code': [f'{i:06d}.SZ' if i % 2 == 0 else f'{i:06d}.SH' for i in range(n_stocks)],
            'name': [f'股票{i}' if i % 10 != 0 else f'*ST股票{i}' for i in range(n_stocks)],
            'close': np.random.uniform(5, 100, n_stocks),
            'pre_close': np.random.uniform(5, 100, n_stocks),
            'open': np.random.uniform(5, 100, n_stocks),
            'high': np.random.uniform(5, 100, n_stocks),
            'low': np.random.uniform(5, 100, n_stocks),
            'volume': np.random.randint(1000, 1000000, n_stocks),
            'amount': np.random.uniform(10000, 10000000, n_stocks)
        }
        
        return pd.DataFrame(data)
    
    @pytest.fixture
    def distribution_ranges(self):
        """创建分布区间"""
        return [
            DistributionRange(name="0-3%", min_value=0.0, max_value=3.0, is_positive=True, display_name="0-3%"),
            DistributionRange(name="3-5%", min_value=3.0, max_value=5.0, is_positive=True, display_name="3-5%"),
            DistributionRange(name="5-7%", min_value=5.0, max_value=7.0, is_positive=True, display_name="5-7%"),
            DistributionRange(name="7-10%", min_value=7.0, max_value=10.0, is_positive=True, display_name="7-10%"),
            DistributionRange(name=">=10%", min_value=10.0, max_value=float('inf'), is_positive=True, display_name=">=10%"),
            DistributionRange(name="0到-3%", min_value=-3.0, max_value=0.0, is_positive=False, display_name="0到-3%"),
            DistributionRange(name="-3到-5%", min_value=-5.0, max_value=-3.0, is_positive=False, display_name="-3到-5%"),
            DistributionRange(name="-5到-7%", min_value=-7.0, max_value=-5.0, is_positive=False, display_name="-5到-7%"),
            DistributionRange(name="-7到-10%", min_value=-10.0, max_value=-7.0, is_positive=False, display_name="-7到-10%"),
            DistributionRange(name="<=-10%", min_value=float('-inf'), max_value=-10.0, is_positive=False, display_name="<=-10%")
        ]
    
    def test_vectorized_price_change_calculation(self, vectorized_ops, sample_stock_data):
        """测试向量化价格变化计算"""
        result = vectorized_ops.vectorized_price_change_calculation(sample_stock_data)
        
        # 验证结果
        assert isinstance(result, pd.DataFrame)
        assert len(result) == len(sample_stock_data)
        assert 'pct_change' in result.columns
        assert 'market' in result.columns
        assert 'is_st' in result.columns
        
        # 验证涨跌幅计算
        expected_pct_change = (result['close'] - result['pre_close']) / result['pre_close'] * 100
        pd.testing.assert_series_equal(result['pct_change'], expected_pct_change, check_names=False)
        
        # 验证ST检测
        st_stocks = result[result['is_st']]
        assert all('ST' in name for name in st_stocks['name'])
    
    def test_vectorized_price_change_calculation_empty_data(self, vectorized_ops):
        """测试空数据的价格变化计算"""
        empty_df = pd.DataFrame()
        result = vectorized_ops.vectorized_price_change_calculation(empty_df)
        
        assert isinstance(result, pd.DataFrame)
        assert len(result) == 0
    
    def test_vectorized_price_change_calculation_missing_columns(self, vectorized_ops):
        """测试缺少列的价格变化计算"""
        # 只有ts_code列
        data = pd.DataFrame({'ts_code': ['000001.SZ', '000002.SZ']})
        result = vectorized_ops.vectorized_price_change_calculation(data)
        
        assert 'pct_change' in result.columns
        assert all(result['pct_change'] == 0.0)
    
    def test_vectorized_distribution_classification(self, vectorized_ops, distribution_ranges):
        """测试向量化分布分类"""
        # 创建测试数据
        pct_changes = pd.Series([2.5, 4.5, 6.5, 8.5, 12.0, -2.5, -4.5, -6.5, -8.5, -12.0])
        
        result = vectorized_ops.vectorized_distribution_classification(pct_changes, distribution_ranges)
        
        # 验证结果
        assert isinstance(result, dict)
        assert len(result) == len(distribution_ranges)
        
        # 验证分类正确性
        assert result["0-3%"].iloc[0] == True  # 2.5%
        assert result["3-5%"].iloc[1] == True  # 4.5%
        assert result["5-7%"].iloc[2] == True  # 6.5%
        assert result["7-10%"].iloc[3] == True  # 8.5%
        assert result[">=10%"].iloc[4] == True  # 12.0%
        
        assert result["0到-3%"].iloc[5] == True  # -2.5%
        assert result["-3到-5%"].iloc[6] == True  # -4.5%
        assert result["-5到-7%"].iloc[7] == True  # -6.5%
        assert result["-7到-10%"].iloc[8] == True  # -8.5%
        assert result["<=-10%"].iloc[9] == True  # -12.0%
    
    def test_vectorized_market_statistics(self, vectorized_ops, sample_stock_data, distribution_ranges):
        """测试向量化市场统计"""
        # 准备数据
        processed_data = vectorized_ops.vectorized_price_change_calculation(sample_stock_data)
        classification_results = vectorized_ops.vectorized_distribution_classification(
            processed_data['pct_change'], distribution_ranges
        )
        
        result = vectorized_ops.vectorized_market_statistics(processed_data, classification_results)
        
        # 验证结果
        assert isinstance(result, dict)
        expected_markets = ['total', 'shanghai', 'shenzhen', 'star', 'beijing', 'st', 'non_st']
        
        for market in expected_markets:
            assert market in result
            assert 'total_stocks' in result[market]
            assert isinstance(result[market]['total_stocks'], int)
            
            # 验证每个区间都有统计
            for range_def in distribution_ranges:
                assert range_def.name in result[market]
                assert 'count' in result[market][range_def.name]
                assert 'percentage' in result[market][range_def.name]
                assert isinstance(result[market][range_def.name]['count'], int)
                assert isinstance(result[market][range_def.name]['percentage'], float)
    
    def test_vectorized_market_classification(self, vectorized_ops):
        """测试向量化市场分类"""
        ts_codes = pd.Series(['688001.SH', '600001.SH', '000001.SZ', '300001.SZ', '430001.BJ'])
        
        result = vectorized_ops._vectorized_market_classification(ts_codes)
        
        assert result.iloc[0] == 'star'      # 688开头 - 科创板
        assert result.iloc[1] == 'shanghai'  # 600开头 - 上海主板
        assert result.iloc[2] == 'shenzhen'  # 000开头 - 深圳主板
        assert result.iloc[3] == 'shenzhen'  # 300开头 - 深圳主板
        assert result.iloc[4] == 'beijing'   # 430开头 - 北证
    
    def test_vectorized_st_detection(self, vectorized_ops):
        """测试向量化ST检测"""
        stock_names = pd.Series(['普通股票', '*ST股票', 'ST股票', '退市股票', '暂停股票'])
        
        result = vectorized_ops._vectorized_st_detection(stock_names)
        
        assert result.iloc[0] == False  # 普通股票
        assert result.iloc[1] == True   # *ST股票
        assert result.iloc[2] == True   # ST股票
        assert result.iloc[3] == True   # 退市股票
        assert result.iloc[4] == True   # 暂停股票


class TestPriceDistributionParallelProcessor:
    """测试价格分布并行处理器"""
    
    @pytest.fixture
    def config(self):
        """创建性能配置"""
        return PriceDistributionPerformanceConfig(
            enable_parallel_processing=True,
            max_workers=2,
            batch_size=100,
            chunk_size=50,
            use_multiprocessing=False
        )
    
    @pytest.fixture
    def parallel_processor(self, config):
        """创建并行处理器"""
        return PriceDistributionParallelProcessor(config)
    
    @pytest.fixture
    def large_stock_data(self):
        """创建大量股票数据"""
        np.random.seed(42)
        n_stocks = 500
        
        data = {
            'ts_code': [f'{i:06d}.SZ' for i in range(n_stocks)],
            'name': [f'股票{i}' for i in range(n_stocks)],
            'close': np.random.uniform(5, 100, n_stocks),
            'pre_close': np.random.uniform(5, 100, n_stocks)
        }
        
        return pd.DataFrame(data)
    
    @pytest.fixture
    def distribution_ranges(self):
        """创建分布区间"""
        return [
            DistributionRange(name="0-3%", min_value=0.0, max_value=3.0, is_positive=True, display_name="0-3%"),
            DistributionRange(name=">=3%", min_value=3.0, max_value=float('inf'), is_positive=True, display_name=">=3%"),
            DistributionRange(name="0到-3%", min_value=-3.0, max_value=0.0, is_positive=False, display_name="0到-3%"),
            DistributionRange(name="<-3%", min_value=float('-inf'), max_value=-3.0, is_positive=False, display_name="<-3%")
        ]
    
    @pytest.mark.asyncio
    async def test_parallel_distribution_analysis_small_data(self, parallel_processor, distribution_ranges):
        """测试小数据集的并行分析（应该使用单线程）"""
        # 小数据集
        small_data = pd.DataFrame({
            'ts_code': ['000001.SZ', '000002.SZ'],
            'close': [10.0, 20.0],
            'pre_close': [9.0, 19.0]
        })
        
        result = await parallel_processor.parallel_distribution_analysis(small_data, distribution_ranges)
        
        assert isinstance(result, dict)
        assert 'total' in result
    
    @pytest.mark.asyncio
    async def test_parallel_distribution_analysis_large_data(self, parallel_processor, large_stock_data, distribution_ranges):
        """测试大数据集的并行分析"""
        result = await parallel_processor.parallel_distribution_analysis(large_stock_data, distribution_ranges)
        
        assert isinstance(result, dict)
        assert 'total' in result
        assert result['total']['total_stocks'] == len(large_stock_data)
    
    @pytest.mark.asyncio
    async def test_parallel_distribution_analysis_empty_data(self, parallel_processor, distribution_ranges):
        """测试空数据的并行分析"""
        empty_data = pd.DataFrame()
        result = await parallel_processor.parallel_distribution_analysis(empty_data, distribution_ranges)
        
        assert result == {}
    
    def test_split_dataframe(self, parallel_processor, large_stock_data):
        """测试DataFrame分割"""
        chunks = parallel_processor._split_dataframe(large_stock_data, 100)
        
        assert len(chunks) == 5  # 500 / 100 = 5
        assert all(len(chunk) <= 100 for chunk in chunks)
        assert sum(len(chunk) for chunk in chunks) == len(large_stock_data)
    
    def test_merge_analysis_results(self, parallel_processor):
        """测试分析结果合并"""
        # 模拟两个块的结果
        results = [
            {
                'total': {
                    'total_stocks': 100,
                    '0-3%': {'count': 30},
                    '>=3%': {'count': 70}
                }
            },
            {
                'total': {
                    'total_stocks': 150,
                    '0-3%': {'count': 45},
                    '>=3%': {'count': 105}
                }
            }
        ]
        
        merged = parallel_processor._merge_analysis_results(results)
        
        assert merged['total']['total_stocks'] == 250
        assert merged['total']['0-3%']['count'] == 75
        assert merged['total']['0-3%']['percentage'] == 30.0  # 75/250 * 100
        assert merged['total']['>=3%']['count'] == 175
        assert merged['total']['>=3%']['percentage'] == 70.0  # 175/250 * 100


class TestPriceDistributionMemoryOptimizer:
    """测试价格分布内存优化器"""
    
    @pytest.fixture
    def config(self):
        """创建性能配置"""
        return PriceDistributionPerformanceConfig(
            enable_memory_optimization=True,
            memory_limit_mb=1024,
            enable_garbage_collection=True,
            gc_frequency=5
        )
    
    @pytest.fixture
    def memory_optimizer(self, config):
        """创建内存优化器"""
        return PriceDistributionMemoryOptimizer(config)
    
    @pytest.fixture
    def sample_stock_data(self):
        """创建示例股票数据"""
        return pd.DataFrame({
            'ts_code': ['000001.SZ', '000002.SZ', '000003.SZ'],
            'name': ['股票1', '股票2', '股票3'],
            'open': [10.0, 20.0, 30.0],
            'high': [11.0, 21.0, 31.0],
            'low': [9.0, 19.0, 29.0],
            'close': [10.5, 20.5, 30.5],
            'pre_close': [10.0, 20.0, 30.0],
            'volume': [1000, 2000, 3000],
            'amount': [10000.0, 40000.0, 90000.0],
            'is_st': [False, True, False]
        })
    
    def test_optimize_stock_dataframe(self, memory_optimizer, sample_stock_data):
        """测试股票数据DataFrame优化"""
        original_memory = sample_stock_data.memory_usage(deep=True).sum()
        
        optimized_df = memory_optimizer.optimize_stock_dataframe(sample_stock_data)
        optimized_memory = optimized_df.memory_usage(deep=True).sum()
        
        # 验证数据完整性
        assert len(optimized_df) == len(sample_stock_data)
        assert list(optimized_df.columns) == list(sample_stock_data.columns)
        
        # 验证数值列优化
        numeric_columns = ['open', 'high', 'low', 'close', 'pre_close', 'volume', 'amount']
        for col in numeric_columns:
            if col in optimized_df.columns:
                assert optimized_df[col].dtype in ['float32', 'int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32']
        
        # 验证布尔列
        assert optimized_df['is_st'].dtype == 'bool'
    
    def test_optimize_stock_dataframe_empty(self, memory_optimizer):
        """测试空DataFrame优化"""
        empty_df = pd.DataFrame()
        result = memory_optimizer.optimize_stock_dataframe(empty_df)
        
        assert isinstance(result, pd.DataFrame)
        assert len(result) == 0
    
    def test_get_memory_usage(self, memory_optimizer):
        """测试内存使用情况获取"""
        memory_usage = memory_optimizer.get_memory_usage()
        
        assert isinstance(memory_usage, dict)
        assert 'rss_mb' in memory_usage
        assert 'vms_mb' in memory_usage
        assert 'percent' in memory_usage
        assert all(isinstance(v, float) for v in memory_usage.values())
    
    def test_check_memory_limit(self, memory_optimizer):
        """测试内存限制检查"""
        # 这个测试可能因系统而异，只验证返回类型
        result = memory_optimizer.check_memory_limit()
        assert isinstance(result, bool)
    
    def test_check_and_cleanup_memory(self, memory_optimizer):
        """测试内存检查和清理"""
        # 模拟多次调用以触发垃圾回收
        for i in range(10):
            memory_optimizer.check_and_cleanup_memory()
        
        # 验证计数器增加
        assert memory_optimizer.gc_counter == 10


class TestPriceDistributionPerformanceOptimizer:
    """测试价格分布性能优化器主类"""
    
    @pytest.fixture
    def config(self):
        """创建性能配置"""
        return PriceDistributionPerformanceConfig(
            enable_vectorization=True,
            enable_parallel_processing=True,
            enable_memory_optimization=True,
            enable_performance_monitoring=False,  # 避免全局状态影响
            max_workers=2,
            batch_size=100
        )
    
    @pytest.fixture
    def optimizer(self, config):
        """创建性能优化器"""
        return PriceDistributionPerformanceOptimizer(config)
    
    @pytest.fixture
    def sample_stock_data(self):
        """创建示例股票数据"""
        np.random.seed(42)
        n_stocks = 200
        
        data = {
            'ts_code': [f'{i:06d}.SZ' for i in range(n_stocks)],
            'name': [f'股票{i}' for i in range(n_stocks)],
            'close': np.random.uniform(5, 100, n_stocks),
            'pre_close': np.random.uniform(5, 100, n_stocks)
        }
        
        return pd.DataFrame(data)
    
    @pytest.fixture
    def distribution_ranges(self):
        """创建分布区间"""
        return [
            DistributionRange(name="0-5%", min_value=0.0, max_value=5.0, is_positive=True, display_name="0-5%"),
            DistributionRange(name=">=5%", min_value=5.0, max_value=float('inf'), is_positive=True, display_name=">=5%"),
            DistributionRange(name="0到-5%", min_value=-5.0, max_value=0.0, is_positive=False, display_name="0到-5%"),
            DistributionRange(name="<-5%", min_value=float('-inf'), max_value=-5.0, is_positive=False, display_name="<-5%")
        ]
    
    @pytest.mark.asyncio
    async def test_optimize_distribution_analysis(self, optimizer, sample_stock_data, distribution_ranges):
        """测试优化分布分析"""
        result = await optimizer.optimize_distribution_analysis(
            sample_stock_data, distribution_ranges, "test_analysis"
        )
        
        # 验证结果
        assert isinstance(result, dict)
        assert 'total' in result
        
        # 验证性能统计更新
        stats = optimizer.get_performance_stats()
        assert stats['performance_stats']['total_operations'] == 1
        assert stats['performance_stats']['successful_operations'] == 1
        assert stats['performance_stats']['average_processing_time'] > 0
    
    @pytest.mark.asyncio
    async def test_optimize_distribution_analysis_empty_data(self, optimizer, distribution_ranges):
        """测试空数据的优化分析"""
        empty_data = pd.DataFrame()
        result = await optimizer.optimize_distribution_analysis(empty_data, distribution_ranges)
        
        assert result == {}
    
    @pytest.mark.asyncio
    async def test_optimize_distribution_analysis_error_handling(self, optimizer, distribution_ranges):
        """测试错误处理"""
        # 使用无效数据触发错误
        invalid_data = pd.DataFrame({'invalid_column': [1, 2, 3]})
        
        with pytest.raises(Exception):
            await optimizer.optimize_distribution_analysis(invalid_data, distribution_ranges)
        
        # 验证错误统计
        stats = optimizer.get_performance_stats()
        assert stats['performance_stats']['failed_operations'] == 1
    
    def test_get_performance_stats(self, optimizer):
        """测试性能统计获取"""
        stats = optimizer.get_performance_stats()
        
        assert isinstance(stats, dict)
        assert 'config' in stats
        assert 'performance_stats' in stats
        assert 'memory_usage' in stats
        assert 'cpu_count' in stats
        
        # 验证配置信息
        config_stats = stats['config']
        assert config_stats['vectorization_enabled'] == True
        assert config_stats['parallel_processing_enabled'] == True
        assert config_stats['memory_optimization_enabled'] == True
    
    def test_reset_performance_stats(self, optimizer):
        """测试性能统计重置"""
        # 先设置一些统计数据
        optimizer.performance_stats['total_operations'] = 10
        optimizer.performance_stats['successful_operations'] = 8
        
        # 重置
        optimizer.reset_performance_stats()
        
        # 验证重置
        assert optimizer.performance_stats['total_operations'] == 0
        assert optimizer.performance_stats['successful_operations'] == 0


class TestPerformanceIntegration:
    """性能优化集成测试"""
    
    @pytest.mark.asyncio
    async def test_performance_comparison(self):
        """测试性能对比"""
        # 创建测试数据
        np.random.seed(42)
        n_stocks = 1000
        
        stock_data = pd.DataFrame({
            'ts_code': [f'{i:06d}.SZ' for i in range(n_stocks)],
            'name': [f'股票{i}' for i in range(n_stocks)],
            'close': np.random.uniform(5, 100, n_stocks),
            'pre_close': np.random.uniform(5, 100, n_stocks)
        })
        
        ranges = [
            DistributionRange(name="0-5%", min_value=0.0, max_value=5.0, is_positive=True, display_name="0-5%"),
            DistributionRange(name=">=5%", min_value=5.0, max_value=float('inf'), is_positive=True, display_name=">=5%")
        ]
        
        # 测试优化版本
        optimized_config = PriceDistributionPerformanceConfig(
            enable_vectorization=True,
            enable_parallel_processing=True,
            enable_memory_optimization=True,
            enable_performance_monitoring=False
        )
        optimized_optimizer = PriceDistributionPerformanceOptimizer(optimized_config)
        
        start_time = time.time()
        optimized_result = await optimized_optimizer.optimize_distribution_analysis(stock_data, ranges)
        optimized_time = time.time() - start_time
        
        # 测试基础版本
        basic_config = PriceDistributionPerformanceConfig(
            enable_vectorization=False,
            enable_parallel_processing=False,
            enable_memory_optimization=False,
            enable_performance_monitoring=False
        )
        basic_optimizer = PriceDistributionPerformanceOptimizer(basic_config)
        
        start_time = time.time()
        basic_result = await basic_optimizer.optimize_distribution_analysis(stock_data, ranges)
        basic_time = time.time() - start_time
        
        # 验证结果一致性（基本结构应该相同）
        assert isinstance(optimized_result, dict)
        assert isinstance(basic_result, dict)
        
        # 记录性能差异
        print(f"优化版本耗时: {optimized_time:.3f}s")
        print(f"基础版本耗时: {basic_time:.3f}s")
        
        # 获取性能统计
        optimized_stats = optimized_optimizer.get_performance_stats()
        basic_stats = basic_optimizer.get_performance_stats()
        
        print(f"优化版本统计: {optimized_stats['performance_stats']}")
        print(f"基础版本统计: {basic_stats['performance_stats']}")
    
    @pytest.mark.asyncio
    async def test_memory_usage_optimization(self):
        """测试内存使用优化"""
        # 创建大量数据
        np.random.seed(42)
        n_stocks = 5000
        
        stock_data = pd.DataFrame({
            'ts_code': [f'{i:06d}.SZ' for i in range(n_stocks)],
            'name': [f'股票{i}' for i in range(n_stocks)],
            'open': np.random.uniform(5, 100, n_stocks),
            'high': np.random.uniform(5, 100, n_stocks),
            'low': np.random.uniform(5, 100, n_stocks),
            'close': np.random.uniform(5, 100, n_stocks),
            'pre_close': np.random.uniform(5, 100, n_stocks),
            'volume': np.random.randint(1000, 1000000, n_stocks),
            'amount': np.random.uniform(10000, 10000000, n_stocks)
        })
        
        ranges = [
            DistributionRange(name="positive", min_value=0.0, max_value=float('inf'), is_positive=True, display_name="positive"),
            DistributionRange(name="negative", min_value=float('-inf'), max_value=0.0, is_positive=False, display_name="negative")
        ]
        
        # 测试内存优化
        config = PriceDistributionPerformanceConfig(
            enable_memory_optimization=True,
            memory_limit_mb=512
        )
        optimizer = PriceDistributionPerformanceOptimizer(config)
        
        # 记录初始内存
        initial_memory = optimizer.memory_optimizer.get_memory_usage()
        
        # 执行分析
        result = await optimizer.optimize_distribution_analysis(stock_data, ranges)
        
        # 记录最终内存
        final_memory = optimizer.memory_optimizer.get_memory_usage()
        
        # 验证结果
        assert isinstance(result, dict)
        
        # 记录内存使用情况
        print(f"初始内存: {initial_memory['rss_mb']:.2f}MB")
        print(f"最终内存: {final_memory['rss_mb']:.2f}MB")
        print(f"内存变化: {final_memory['rss_mb'] - initial_memory['rss_mb']:+.2f}MB")
        
        # 获取优化统计
        stats = optimizer.get_performance_stats()
        print(f"内存优化次数: {stats['performance_stats']['memory_optimizations']}")


if __name__ == "__main__":
    pytest.main([__file__, "-v"])