"""
历史数据验证测试

使用历史数据验证涨停统计系统的准确性和一致性
"""

import pytest
import pytest_asyncio
import asyncio
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
from unittest.mock import Mock, AsyncMock, patch
import tempfile
import os

from quickstock.services.limit_up_stats_service import LimitUpStatsService
from quickstock.models import LimitUpStatsRequest, LimitUpStats, StockDailyData
from quickstock.utils.stock_classifier import StockCodeClassifier
from quickstock.utils.limit_up_detector import LimitUpDetector
from quickstock.core.database import DatabaseManager
from quickstock.core.repository import LimitUpStatsRepository
from quickstock.core.data_manager import DataManager


class HistoricalDataValidator:
    """历史数据验证器"""
    
    def __init__(self, logger=None):
        """初始化历史数据验证器"""
        self.logger = logger
        self.classifier = StockCodeClassifier()
        self.detector = LimitUpDetector()
        
        # 已知的历史涨停数据用于验证
        self.known_limit_up_data = self._create_known_historical_data()
    
    def _create_known_historical_data(self) -> Dict[str, Dict[str, Any]]:
        """创建已知的历史涨停数据"""
        return {
            '20241015': {
                'expected_stats': {
                    'total': 150,
                    'non_st': 135,
                    'shanghai': 60,
                    'shenzhen': 55,
                    'star': 25,
                    'beijing': 10,
                    'st': 15
                },
                'sample_limit_up_stocks': [
                    {'ts_code': '600000.SH', 'name': '浦发银行', 'market': 'shanghai', 'is_st': False},
                    {'ts_code': '000001.SZ', 'name': '平安银行', 'market': 'shenzhen', 'is_st': False},
                    {'ts_code': '688001.SH', 'name': '华兴源创', 'market': 'star', 'is_st': False},
                    {'ts_code': '430001.BJ', 'name': '北证股票', 'market': 'beijing', 'is_st': False},
                    {'ts_code': '000002.SZ', 'name': 'ST万科', 'market': 'shenzhen', 'is_st': True}
                ]
            },
            '20241016': {
                'expected_stats': {
                    'total': 89,
                    'non_st': 82,
                    'shanghai': 35,
                    'shenzhen': 32,
                    'star': 15,
                    'beijing': 7,
                    'st': 7
                },
                'sample_limit_up_stocks': [
                    {'ts_code': '600036.SH', 'name': '招商银行', 'market': 'shanghai', 'is_st': False},
                    {'ts_code': '000858.SZ', 'name': '五粮液', 'market': 'shenzhen', 'is_st': False},
                    {'ts_code': '688099.SH', 'name': '晶晨股份', 'market': 'star', 'is_st': False}
                ]
            },
            '20241017': {
                'expected_stats': {
                    'total': 203,
                    'non_st': 185,
                    'shanghai': 85,
                    'shenzhen': 78,
                    'star': 22,
                    'beijing': 18,
                    'st': 18
                },
                'sample_limit_up_stocks': [
                    {'ts_code': '600519.SH', 'name': '贵州茅台', 'market': 'shanghai', 'is_st': False},
                    {'ts_code': '000858.SZ', 'name': '五粮液', 'market': 'shenzhen', 'is_st': False}
                ]
            }
        }
    
    def generate_historical_stock_data(self, trade_date: str, 
                                     expected_stats: Dict[str, int],
                                     sample_stocks: List[Dict[str, Any]]) -> pd.DataFrame:
        """
        根据已知统计数据生成历史股票数据
        
        Args:
            trade_date: 交易日期
            expected_stats: 预期统计数据
            sample_stocks: 样本股票列表
            
        Returns:
            生成的历史股票数据
        """
        stock_data = []
        stock_id = 0
        
        # 生成各市场的涨停股票
        markets = ['shanghai', 'shenzhen', 'star', 'beijing']
        for market in markets:
            market_count = expected_stats.get(market, 0)
            
            for i in range(market_count):
                # 生成股票代码
                if market == 'shanghai':
                    ts_code = f'60{stock_id:04d}.SH'
                elif market == 'shenzhen':
                    ts_code = f'00{stock_id:04d}.SZ'
                elif market == 'star':
                    ts_code = f'688{stock_id:03d}.SH'
                else:  # beijing
                    ts_code = f'43{stock_id:04d}.BJ'
                
                # 确定是否为ST股票
                is_st = i < expected_stats.get('st', 0) and market in ['shanghai', 'shenzhen']
                stock_name = f'ST股票{stock_id:04d}' if is_st else f'股票{stock_id:04d}'
                
                # 生成涨停价格数据
                base_price = 10.0 + (stock_id % 100) * 0.1
                
                if market == 'star':
                    limit_up_price = base_price * 1.20  # 科创板20%
                elif market == 'beijing':
                    limit_up_price = base_price * 1.30  # 北证30%
                elif is_st:
                    limit_up_price = base_price * 1.05  # ST股票5%
                else:
                    limit_up_price = base_price * 1.10  # 普通股票10%
                
                stock_data.append({
                    'ts_code': ts_code,
                    'trade_date': trade_date,
                    'open': base_price,
                    'high': limit_up_price,
                    'low': base_price * 0.98,
                    'close': limit_up_price,
                    'pre_close': base_price,
                    'change': limit_up_price - base_price,
                    'pct_chg': (limit_up_price - base_price) / base_price * 100,
                    'vol': (stock_id % 1000 + 1) * 1000,
                    'amount': limit_up_price * (stock_id % 1000 + 1) * 1000,
                    'name': stock_name
                })
                
                stock_id += 1
        
        # 生成一些非涨停股票
        non_limit_up_count = 1000  # 生成1000只非涨停股票
        for i in range(non_limit_up_count):
            market = markets[i % len(markets)]
            
            if market == 'shanghai':
                ts_code = f'60{stock_id:04d}.SH'
            elif market == 'shenzhen':
                ts_code = f'00{stock_id:04d}.SZ'
            elif market == 'star':
                ts_code = f'688{stock_id:03d}.SH'
            else:  # beijing
                ts_code = f'43{stock_id:04d}.BJ'
            
            base_price = 10.0 + (stock_id % 100) * 0.1
            # 非涨停股票，价格变动在-5%到+8%之间
            price_change_pct = (i % 13 - 5) / 100.0
            close_price = base_price * (1 + price_change_pct)
            high_price = close_price * 1.02
            
            stock_data.append({
                'ts_code': ts_code,
                'trade_date': trade_date,
                'open': base_price,
                'high': high_price,
                'low': base_price * 0.98,
                'close': close_price,
                'pre_close': base_price,
                'change': close_price - base_price,
                'pct_chg': price_change_pct * 100,
                'vol': (stock_id % 1000 + 1) * 1000,
                'amount': close_price * (stock_id % 1000 + 1) * 1000,
                'name': f'股票{stock_id:04d}'
            })
            
            stock_id += 1
        
        # 添加样本股票（如果提供）
        for sample_stock in sample_stocks:
            # 确保样本股票在数据中
            existing_stock = next((s for s in stock_data if s['ts_code'] == sample_stock['ts_code']), None)
            if not existing_stock:
                base_price = 20.0
                if sample_stock['market'] == 'star':
                    limit_up_price = base_price * 1.20
                elif sample_stock['market'] == 'beijing':
                    limit_up_price = base_price * 1.30
                elif sample_stock['is_st']:
                    limit_up_price = base_price * 1.05
                else:
                    limit_up_price = base_price * 1.10
                
                stock_data.append({
                    'ts_code': sample_stock['ts_code'],
                    'trade_date': trade_date,
                    'open': base_price,
                    'high': limit_up_price,
                    'low': base_price * 0.98,
                    'close': limit_up_price,
                    'pre_close': base_price,
                    'change': limit_up_price - base_price,
                    'pct_chg': (limit_up_price - base_price) / base_price * 100,
                    'vol': 1000000,
                    'amount': limit_up_price * 1000000,
                    'name': sample_stock['name']
                })
        
        return pd.DataFrame(stock_data)
    
    def validate_statistics_accuracy(self, actual_stats: LimitUpStats, 
                                   expected_stats: Dict[str, int],
                                   tolerance: float = 0.05) -> Dict[str, Any]:
        """
        验证统计数据准确性
        
        Args:
            actual_stats: 实际统计结果
            expected_stats: 预期统计结果
            tolerance: 容差比例
            
        Returns:
            验证结果
        """
        validation_result = {
            'is_accurate': True,
            'accuracy_issues': [],
            'accuracy_metrics': {},
            'detailed_comparison': {}
        }
        
        # 比较各项统计指标
        stats_to_compare = ['total', 'non_st', 'shanghai', 'shenzhen', 'star', 'beijing', 'st']
        
        for stat_name in stats_to_compare:
            actual_value = getattr(actual_stats, stat_name, 0)
            expected_value = expected_stats.get(stat_name, 0)
            
            # 计算准确率
            if expected_value > 0:
                accuracy = 1 - abs(actual_value - expected_value) / expected_value
                acceptable = accuracy >= (1 - tolerance)
            else:
                accuracy = 1.0 if actual_value == 0 else 0.0
                acceptable = actual_value == expected_value
            
            validation_result['accuracy_metrics'][stat_name] = accuracy
            validation_result['detailed_comparison'][stat_name] = {
                'actual': actual_value,
                'expected': expected_value,
                'difference': actual_value - expected_value,
                'accuracy': accuracy,
                'acceptable': acceptable
            }
            
            if not acceptable:
                validation_result['is_accurate'] = False
                validation_result['accuracy_issues'].append(
                    f"{stat_name}: 实际值 {actual_value}, 预期值 {expected_value}, "
                    f"准确率 {accuracy:.2%}"
                )
        
        # 计算总体准确率
        overall_accuracy = sum(validation_result['accuracy_metrics'].values()) / len(stats_to_compare)
        validation_result['overall_accuracy'] = overall_accuracy
        
        return validation_result
    
    def validate_data_consistency(self, stats: LimitUpStats) -> Dict[str, Any]:
        """
        验证数据一致性
        
        Args:
            stats: 统计结果
            
        Returns:
            一致性验证结果
        """
        consistency_result = {
            'is_consistent': True,
            'consistency_issues': [],
            'consistency_checks': {}
        }
        
        # 检查1: 总数 = 各市场之和
        market_sum = stats.shanghai + stats.shenzhen + stats.star + stats.beijing
        total_check = market_sum == stats.total
        consistency_result['consistency_checks']['market_sum_equals_total'] = {
            'passed': total_check,
            'market_sum': market_sum,
            'total': stats.total,
            'difference': market_sum - stats.total
        }
        
        if not total_check:
            consistency_result['is_consistent'] = False
            consistency_result['consistency_issues'].append(
                f"市场总和 ({market_sum}) != 总数 ({stats.total})"
            )
        
        # 检查2: 总数 = ST + 非ST
        st_sum = stats.st + stats.non_st
        st_check = st_sum <= stats.total  # 允许有未分类的股票
        consistency_result['consistency_checks']['st_sum_check'] = {
            'passed': st_check,
            'st_sum': st_sum,
            'total': stats.total,
            'difference': st_sum - stats.total
        }
        
        if not st_check:
            consistency_result['is_consistent'] = False
            consistency_result['consistency_issues'].append(
                f"ST分类总和 ({st_sum}) > 总数 ({stats.total})"
            )
        
        # 检查3: 涨停股票列表长度 = 总数
        stock_list_check = len(stats.limit_up_stocks) == stats.total
        consistency_result['consistency_checks']['stock_list_length'] = {
            'passed': stock_list_check,
            'list_length': len(stats.limit_up_stocks),
            'total': stats.total,
            'difference': len(stats.limit_up_stocks) - stats.total
        }
        
        if not stock_list_check:
            consistency_result['is_consistent'] = False
            consistency_result['consistency_issues'].append(
                f"股票列表长度 ({len(stats.limit_up_stocks)}) != 总数 ({stats.total})"
            )
        
        # 检查4: 市场分解数据一致性
        if stats.market_breakdown:
            breakdown_total = sum(len(stocks) for stocks in stats.market_breakdown.values())
            breakdown_check = breakdown_total <= stats.total
            consistency_result['consistency_checks']['market_breakdown'] = {
                'passed': breakdown_check,
                'breakdown_total': breakdown_total,
                'total': stats.total,
                'difference': breakdown_total - stats.total
            }
            
            if not breakdown_check:
                consistency_result['is_consistent'] = False
                consistency_result['consistency_issues'].append(
                    f"市场分解总数 ({breakdown_total}) > 总数 ({stats.total})"
                )
        
        return consistency_result
    
    def validate_edge_cases(self, service: LimitUpStatsService) -> Dict[str, Any]:
        """
        验证边界情况和特殊交易场景
        
        Args:
            service: 涨停统计服务
            
        Returns:
            边界情况验证结果
        """
        edge_case_results = {
            'all_passed': True,
            'test_results': {}
        }
        
        # 测试用例1: 无涨停股票的交易日
        no_limit_up_data = pd.DataFrame([
            {
                'ts_code': '600000.SH',
                'trade_date': '20241020',
                'open': 10.0,
                'high': 10.5,
                'low': 9.8,
                'close': 10.2,
                'pre_close': 10.0,
                'change': 0.2,
                'pct_chg': 2.0,
                'vol': 1000000,
                'amount': 10200000,
                'name': '测试股票'
            }
        ])
        
        try:
            result = service.vectorized_ops.vectorized_limit_up_detection(no_limit_up_data)
            limit_up_count = result['is_limit_up'].sum()
            
            edge_case_results['test_results']['no_limit_up_day'] = {
                'passed': limit_up_count == 0,
                'limit_up_count': limit_up_count,
                'expected': 0
            }
            
            if limit_up_count != 0:
                edge_case_results['all_passed'] = False
                
        except Exception as e:
            edge_case_results['test_results']['no_limit_up_day'] = {
                'passed': False,
                'error': str(e)
            }
            edge_case_results['all_passed'] = False
        
        # 测试用例2: 全部涨停的交易日
        all_limit_up_data = pd.DataFrame([
            {
                'ts_code': '600000.SH',
                'trade_date': '20241021',
                'open': 10.0,
                'high': 11.0,
                'low': 10.0,
                'close': 11.0,
                'pre_close': 10.0,
                'change': 1.0,
                'pct_chg': 10.0,
                'vol': 1000000,
                'amount': 11000000,
                'name': '测试股票1'
            },
            {
                'ts_code': '000001.SZ',
                'trade_date': '20241021',
                'open': 20.0,
                'high': 22.0,
                'low': 20.0,
                'close': 22.0,
                'pre_close': 20.0,
                'change': 2.0,
                'pct_chg': 10.0,
                'vol': 2000000,
                'amount': 44000000,
                'name': '测试股票2'
            }
        ])
        
        try:
            result = service.vectorized_ops.vectorized_limit_up_detection(all_limit_up_data)
            limit_up_count = result['is_limit_up'].sum()
            
            edge_case_results['test_results']['all_limit_up_day'] = {
                'passed': limit_up_count == len(all_limit_up_data),
                'limit_up_count': limit_up_count,
                'expected': len(all_limit_up_data)
            }
            
            if limit_up_count != len(all_limit_up_data):
                edge_case_results['all_passed'] = False
                
        except Exception as e:
            edge_case_results['test_results']['all_limit_up_day'] = {
                'passed': False,
                'error': str(e)
            }
            edge_case_results['all_passed'] = False
        
        # 测试用例3: ST股票5%涨停
        st_stock_data = pd.DataFrame([
            {
                'ts_code': '000002.SZ',
                'trade_date': '20241022',
                'open': 10.0,
                'high': 10.5,
                'low': 10.0,
                'close': 10.5,
                'pre_close': 10.0,
                'change': 0.5,
                'pct_chg': 5.0,
                'vol': 1000000,
                'amount': 10500000,
                'name': 'ST测试'
            }
        ])
        
        try:
            result = service.vectorized_ops.vectorized_limit_up_detection(st_stock_data)
            limit_up_count = result['is_limit_up'].sum()
            is_st_detected = result['is_st'].iloc[0] if len(result) > 0 else False
            
            edge_case_results['test_results']['st_stock_5_percent'] = {
                'passed': limit_up_count == 1 and is_st_detected,
                'limit_up_count': limit_up_count,
                'is_st_detected': is_st_detected,
                'expected_limit_up': 1,
                'expected_st': True
            }
            
            if not (limit_up_count == 1 and is_st_detected):
                edge_case_results['all_passed'] = False
                
        except Exception as e:
            edge_case_results['test_results']['st_stock_5_percent'] = {
                'passed': False,
                'error': str(e)
            }
            edge_case_results['all_passed'] = False
        
        # 测试用例4: 科创板20%涨停
        star_stock_data = pd.DataFrame([
            {
                'ts_code': '688001.SH',
                'trade_date': '20241023',
                'open': 50.0,
                'high': 60.0,
                'low': 50.0,
                'close': 60.0,
                'pre_close': 50.0,
                'change': 10.0,
                'pct_chg': 20.0,
                'vol': 500000,
                'amount': 30000000,
                'name': '科创板测试'
            }
        ])
        
        try:
            result = service.vectorized_ops.vectorized_limit_up_detection(star_stock_data)
            limit_up_count = result['is_limit_up'].sum()
            market_classification = result['market'].iloc[0] if len(result) > 0 else 'unknown'
            
            edge_case_results['test_results']['star_stock_20_percent'] = {
                'passed': limit_up_count == 1 and market_classification == 'star',
                'limit_up_count': limit_up_count,
                'market_classification': market_classification,
                'expected_limit_up': 1,
                'expected_market': 'star'
            }
            
            if not (limit_up_count == 1 and market_classification == 'star'):
                edge_case_results['all_passed'] = False
                
        except Exception as e:
            edge_case_results['test_results']['star_stock_20_percent'] = {
                'passed': False,
                'error': str(e)
            }
            edge_case_results['all_passed'] = False
        
        return edge_case_results


class TestHistoricalDataValidation:
    """历史数据验证测试类"""
    
    @pytest_asyncio.fixture
    async def temp_database(self):
        """创建临时数据库"""
        with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp_file:
            db_path = tmp_file.name
        
        try:
            db_manager = DatabaseManager(db_path)
            await db_manager.initialize()
            yield db_manager
        finally:
            # 清理临时文件
            files_to_clean = [db_path, f"{db_path}-wal", f"{db_path}-shm"]
            for file_path in files_to_clean:
                if os.path.exists(file_path):
                    try:
                        os.unlink(file_path)
                    except OSError:
                        pass
    
    @pytest.fixture
    def historical_validator(self):
        """创建历史数据验证器"""
        return HistoricalDataValidator()
    
    @pytest.fixture
    def mock_data_manager(self, historical_validator):
        """创建模拟数据管理器"""
        mock_manager = Mock()
        mock_manager.get_data = AsyncMock()
        
        def mock_get_data(request):
            # 根据请求类型返回相应的模拟数据
            if hasattr(request, 'start_date'):
                trade_date = request.start_date
                if trade_date in historical_validator.known_limit_up_data:
                    known_data = historical_validator.known_limit_up_data[trade_date]
                    stock_data = historical_validator.generate_historical_stock_data(
                        trade_date,
                        known_data['expected_stats'],
                        known_data['sample_limit_up_stocks']
                    )
                    
                    if request.data_type == 'stock_daily':
                        return stock_data
                    elif request.data_type == 'stock_basic':
                        return pd.DataFrame([
                            {'ts_code': row['ts_code'], 'name': row['name']}
                            for _, row in stock_data.iterrows()
                        ])
            
            return pd.DataFrame()
        
        mock_manager.get_data.side_effect = mock_get_data
        return mock_manager
    
    @pytest.mark.asyncio
    async def test_known_historical_data_accuracy(self, historical_validator, mock_data_manager):
        """测试已知历史数据的准确性"""
        service = LimitUpStatsService(mock_data_manager)
        
        for trade_date, known_data in historical_validator.known_limit_up_data.items():
            # 执行涨停统计
            request = LimitUpStatsRequest(trade_date=trade_date)
            actual_stats = await service.get_daily_limit_up_stats(request)
            
            # 验证统计准确性
            validation_result = historical_validator.validate_statistics_accuracy(
                actual_stats, 
                known_data['expected_stats'],
                tolerance=0.1  # 10%容差
            )
            
            # 断言验证结果
            assert validation_result['is_accurate'], \
                f"日期 {trade_date} 统计不准确: {validation_result['accuracy_issues']}"
            
            assert validation_result['overall_accuracy'] >= 0.9, \
                f"日期 {trade_date} 总体准确率过低: {validation_result['overall_accuracy']:.2%}"
            
            print(f"日期 {trade_date} 验证通过，总体准确率: {validation_result['overall_accuracy']:.2%}")
    
    @pytest.mark.asyncio
    async def test_data_consistency_validation(self, historical_validator, mock_data_manager):
        """测试数据一致性验证"""
        service = LimitUpStatsService(mock_data_manager)
        
        for trade_date in historical_validator.known_limit_up_data.keys():
            # 执行涨停统计
            request = LimitUpStatsRequest(trade_date=trade_date)
            actual_stats = await service.get_daily_limit_up_stats(request)
            
            # 验证数据一致性
            consistency_result = historical_validator.validate_data_consistency(actual_stats)
            
            # 断言一致性检查
            assert consistency_result['is_consistent'], \
                f"日期 {trade_date} 数据不一致: {consistency_result['consistency_issues']}"
            
            # 验证具体的一致性检查
            for check_name, check_result in consistency_result['consistency_checks'].items():
                assert check_result['passed'], \
                    f"日期 {trade_date} 一致性检查 {check_name} 失败: {check_result}"
            
            print(f"日期 {trade_date} 一致性验证通过")
    
    @pytest.mark.asyncio
    async def test_edge_cases_validation(self, historical_validator, mock_data_manager):
        """测试边界情况验证"""
        service = LimitUpStatsService(mock_data_manager)
        
        # 执行边界情况验证
        edge_case_results = historical_validator.validate_edge_cases(service)
        
        # 断言所有边界情况测试通过
        assert edge_case_results['all_passed'], \
            f"边界情况测试失败: {edge_case_results['test_results']}"
        
        # 验证具体的边界情况
        for test_name, test_result in edge_case_results['test_results'].items():
            assert test_result['passed'], \
                f"边界情况测试 {test_name} 失败: {test_result}"
            
            print(f"边界情况测试 {test_name} 通过")
    
    @pytest.mark.asyncio
    async def test_different_market_conditions(self, historical_validator, mock_data_manager):
        """测试不同市场条件下的统计准确性"""
        service = LimitUpStatsService(mock_data_manager)
        
        # 测试不同的市场条件
        market_conditions = [
            {
                'name': '牛市高涨停',
                'trade_date': '20241025',
                'expected_stats': {
                    'total': 500,
                    'non_st': 450,
                    'shanghai': 200,
                    'shenzhen': 180,
                    'star': 80,
                    'beijing': 40,
                    'st': 50
                },
                'sample_stocks': []
            },
            {
                'name': '熊市低涨停',
                'trade_date': '20241026',
                'expected_stats': {
                    'total': 20,
                    'non_st': 18,
                    'shanghai': 8,
                    'shenzhen': 7,
                    'star': 3,
                    'beijing': 2,
                    'st': 2
                },
                'sample_stocks': []
            },
            {
                'name': '震荡市中等涨停',
                'trade_date': '20241027',
                'expected_stats': {
                    'total': 120,
                    'non_st': 108,
                    'shanghai': 48,
                    'shenzhen': 42,
                    'star': 18,
                    'beijing': 12,
                    'st': 12
                },
                'sample_stocks': []
            }
        ]
        
        for condition in market_conditions:
            # 生成测试数据
            stock_data = historical_validator.generate_historical_stock_data(
                condition['trade_date'],
                condition['expected_stats'],
                condition['sample_stocks']
            )
            
            # 模拟数据管理器返回
            mock_data_manager.get_data.side_effect = lambda req: (
                stock_data if req.data_type == 'stock_daily' 
                else pd.DataFrame([{'ts_code': row['ts_code'], 'name': row['name']} 
                                 for _, row in stock_data.iterrows()])
            )
            
            # 执行统计
            request = LimitUpStatsRequest(trade_date=condition['trade_date'])
            actual_stats = await service.get_daily_limit_up_stats(request)
            
            # 验证准确性
            validation_result = historical_validator.validate_statistics_accuracy(
                actual_stats,
                condition['expected_stats'],
                tolerance=0.05  # 5%容差
            )
            
            assert validation_result['is_accurate'], \
                f"市场条件 {condition['name']} 统计不准确: {validation_result['accuracy_issues']}"
            
            # 验证一致性
            consistency_result = historical_validator.validate_data_consistency(actual_stats)
            assert consistency_result['is_consistent'], \
                f"市场条件 {condition['name']} 数据不一致: {consistency_result['consistency_issues']}"
            
            print(f"市场条件 {condition['name']} 验证通过，准确率: {validation_result['overall_accuracy']:.2%}")
    
    def test_stock_classifier_accuracy(self, historical_validator):
        """测试股票分类器准确性"""
        classifier = historical_validator.classifier
        
        # 测试已知股票代码的分类
        test_cases = [
            {'ts_code': '600000.SH', 'expected_market': 'shanghai'},
            {'ts_code': '600519.SH', 'expected_market': 'shanghai'},
            {'ts_code': '000001.SZ', 'expected_market': 'shenzhen'},
            {'ts_code': '000858.SZ', 'expected_market': 'shenzhen'},
            {'ts_code': '300001.SZ', 'expected_market': 'shenzhen'},
            {'ts_code': '688001.SH', 'expected_market': 'star'},
            {'ts_code': '688099.SH', 'expected_market': 'star'},
            {'ts_code': '430001.BJ', 'expected_market': 'beijing'},
            {'ts_code': '430002.BJ', 'expected_market': 'beijing'}
        ]
        
        for test_case in test_cases:
            actual_market = classifier.classify_market(test_case['ts_code'])
            assert actual_market == test_case['expected_market'], \
                f"股票 {test_case['ts_code']} 分类错误: 实际 {actual_market}, 预期 {test_case['expected_market']}"
        
        # 测试ST股票检测
        st_test_cases = [
            {'name': '平安银行', 'expected_st': False},
            {'name': 'ST万科', 'expected_st': True},
            {'name': '*ST海马', 'expected_st': True},
            {'name': '退市大控', 'expected_st': True},
            {'name': '暂停交易', 'expected_st': True},
            {'name': '贵州茅台', 'expected_st': False}
        ]
        
        for test_case in st_test_cases:
            actual_st = classifier.is_st_stock(test_case['name'])
            assert actual_st == test_case['expected_st'], \
                f"股票 {test_case['name']} ST检测错误: 实际 {actual_st}, 预期 {test_case['expected_st']}"
        
        print("股票分类器准确性验证通过")
    
    def test_limit_up_detector_accuracy(self, historical_validator):
        """测试涨停检测器准确性"""
        detector = historical_validator.detector
        
        # 测试不同类型股票的涨停检测
        test_cases = [
            {
                'name': '普通股票10%涨停',
                'open': 10.0,
                'close': 11.0,
                'high': 11.0,
                'prev_close': 10.0,
                'stock_type': 'normal',
                'expected': True
            },
            {
                'name': 'ST股票5%涨停',
                'open': 10.0,
                'close': 10.5,
                'high': 10.5,
                'prev_close': 10.0,
                'stock_type': 'st',
                'expected': True
            },
            {
                'name': '科创板20%涨停',
                'open': 50.0,
                'close': 60.0,
                'high': 60.0,
                'prev_close': 50.0,
                'stock_type': 'star',
                'expected': True
            },
            {
                'name': '北证30%涨停',
                'open': 10.0,
                'close': 13.0,
                'high': 13.0,
                'prev_close': 10.0,
                'stock_type': 'beijing',
                'expected': True
            },
            {
                'name': '普通股票非涨停',
                'open': 10.0,
                'close': 10.5,
                'high': 10.8,
                'prev_close': 10.0,
                'stock_type': 'normal',
                'expected': False
            },
            {
                'name': '收盘价不等于最高价',
                'open': 10.0,
                'close': 10.9,
                'high': 11.0,
                'prev_close': 10.0,
                'stock_type': 'normal',
                'expected': False
            }
        ]
        
        for test_case in test_cases:
            actual = detector.is_limit_up(
                open_price=test_case['open'],
                close_price=test_case['close'],
                high_price=test_case['high'],
                prev_close=test_case['prev_close'],
                stock_type=test_case['stock_type']
            )
            
            assert actual == test_case['expected'], \
                f"涨停检测错误 {test_case['name']}: 实际 {actual}, 预期 {test_case['expected']}"
        
        print("涨停检测器准确性验证通过")
    
    @pytest.mark.asyncio
    async def test_performance_under_different_data_sizes(self, historical_validator, mock_data_manager):
        """测试不同数据规模下的性能和准确性"""
        service = LimitUpStatsService(mock_data_manager)
        
        data_sizes = [100, 500, 1000, 5000]
        
        for data_size in data_sizes:
            # 生成指定大小的测试数据
            expected_stats = {
                'total': data_size // 10,  # 10%涨停
                'non_st': data_size // 10 - data_size // 100,  # 1%ST
                'shanghai': data_size // 40,
                'shenzhen': data_size // 40,
                'star': data_size // 50,
                'beijing': data_size // 100,
                'st': data_size // 100
            }
            
            stock_data = historical_validator.generate_historical_stock_data(
                '20241030',
                expected_stats,
                []
            )
            
            # 调整数据大小
            if len(stock_data) > data_size:
                stock_data = stock_data.head(data_size)
            
            # 模拟数据管理器
            mock_data_manager.get_data.side_effect = lambda req: (
                stock_data if req.data_type == 'stock_daily'
                else pd.DataFrame([{'ts_code': row['ts_code'], 'name': row['name']}
                                 for _, row in stock_data.iterrows()])
            )
            
            # 测试性能
            import time
            start_time = time.time()
            
            request = LimitUpStatsRequest(trade_date='20241030')
            actual_stats = await service.get_daily_limit_up_stats(request)
            
            processing_time = time.time() - start_time
            
            # 验证准确性
            validation_result = historical_validator.validate_data_consistency(actual_stats)
            assert validation_result['is_consistent'], \
                f"数据大小 {data_size} 一致性验证失败: {validation_result['consistency_issues']}"
            
            # 性能要求
            max_time = data_size / 1000  # 每1000只股票1秒
            assert processing_time < max_time, \
                f"数据大小 {data_size} 处理时间过长: {processing_time:.3f}s > {max_time:.3f}s"
            
            print(f"数据大小 {data_size} 验证通过，处理时间: {processing_time:.3f}s")


if __name__ == "__main__":
    pytest.main([__file__])