# -*- coding: utf-8 -*-
"""
表单数据提取器

直接从数据库获取表单字段配置，结合 POST 数据，生成 AI 指令
"""
import logging
from typing import Dict, List, Any, Optional

logger = logging.getLogger(__name__)


class FormDataExtractor:
    """表单数据提取器"""
    
    @staticmethod
    def extract_generation_params(
        category_id: str,
        form_data: Dict[str, Any]
    ) -> Dict[str, Any]:
        """从表单数据中提取生成参数
        
        直接遍历表单字段配置，将 label + value 组合返回
        同时保留旧接口的兼容性（language, vulnerabilities, scene, difficulty）
        
        Args:
            category_id: 方向 ID
            form_data: POST 提交的表单数据
            
        Returns:
            提取的参数字典:
            - form_fields_data: 表单字段数据列表 [{label, value, type}, ...]
            - difficulty: 难度（用于 Prompt 选择）
            - language: 编程语言（向后兼容）
            - vulnerabilities: 漏洞列表（向后兼容）
            - scene: 场景（向后兼容）
            - extra_requirements: 额外要求（向后兼容）
        """
        from app.models.database.models import CategoryConfig
        
        category = CategoryConfig.query.get(category_id)
        if not category:
            logger.warning(f"方向配置不存在: {category_id}")
            return {
                'form_fields_data': [],
                'difficulty': None,
                'language': None,
                'vulnerabilities': None,
                'scene': None,
                'extra_requirements': ''
            }
        
        form_fields = category.get_form_fields()
        
        logger.info(f"原始表单数据: {form_data}")
        logger.info(f"表单字段配置数量: {len(form_fields)}")
        
        # 新接口：表单字段数据列表
        form_fields_data = []
        
        # 旧接口：向后兼容
        difficulty = None
        language = None
        vulnerabilities = None
        scene = None
        extra_requirements = ''
        
        # 遍历所有字段配置
        for field in form_fields:
            field_id = field.get('id')
            field_label = field.get('label', field_id)
            field_type = field.get('type')
            field_value = form_data.get(field_id)
            
            # 跳过空值和隐藏字段
            if field_value is None or field_value == '':
                continue
            if field.get('hidden', False):
                continue
            
            # 格式化值（处理列表、字典等）
            formatted_value = FormDataExtractor._format_value(field_value, field)
            
            if formatted_value:
                # 新接口：添加到表单字段数据列表
                form_fields_data.append({
                    'id': field_id,
                    'label': field_label,
                    'value': formatted_value,
                    'raw_value': field_value,  # 保留原始值
                    'type': field_type
                })
                
                # 旧接口：识别特定字段（向后兼容）
                semantic = FormDataExtractor._identify_field_semantic(field, field_value)
                
                if semantic == 'difficulty' and not difficulty:
                    difficulty = str(field_value).strip()
                elif semantic == 'language' and not language:
                    language = str(field_value).strip()
                elif semantic == 'vulnerability' and not vulnerabilities:
                    if isinstance(field_value, list):
                        vulnerabilities = field_value
                    else:
                        vulnerabilities = [field_value]
                elif semantic == 'scene' and not scene:
                    scene = {'id': field_value, 'name': formatted_value}
                elif semantic == 'extra_requirements' and not extra_requirements:
                    extra_requirements = str(field_value).strip()
        
        logger.info(f"提取的表单数据: {len(form_fields_data)} 个字段")
        logger.info(f"difficulty={difficulty}, language={language}, scene={scene}")
        
        return {
            # 新接口
            'form_fields_data': form_fields_data,
            # 旧接口（向后兼容）
            'difficulty': difficulty,
            'language': language,
            'vulnerabilities': vulnerabilities,
            'scene': scene,
            'extra_requirements': extra_requirements
        }
    
    @staticmethod
    def _format_value(value: Any, field: Dict) -> Optional[str]:
        """格式化字段值为字符串"""
        if value is None or value == '':
            return None
        
        field_type = field.get('type')
        options = field.get('options', [])
        
        # 多选字段：列表转逗号分隔
        if field_type in ['multi_select', 'multi_select_categorized']:
            if isinstance(value, list):
                items = []
                for v in value:
                    if isinstance(v, dict):
                        items.append(v.get('name', str(v)))
                    else:
                        items.append(str(v))
                return ', '.join(items) if items else None
            return str(value)
        
        # 单选字段：尝试从 options 获取 label
        if field_type in ['select', 'select_with_sub']:
            if isinstance(value, dict):
                return value.get('name', str(value))
            
            # 尝试从 options 中找到对应的 label
            for opt in options:
                if isinstance(opt, dict):
                    if opt.get('value') == value:
                        return opt.get('label', str(value))
                elif opt == value:
                    return str(value)
            
            return str(value)
        
        # 其他类型：直接转字符串
        return str(value).strip() if value else None
    
    @staticmethod
    def _identify_field_semantic(field: Dict, value: Any) -> Optional[str]:
        """识别字段的语义类型（用于向后兼容）"""
        field_id = field.get('id', '').lower()
        field_label = field.get('label', '').lower()
        field_type = field.get('type')
        options = field.get('options', [])
        
        # 1. 难度字段
        difficulty_values = ['入门', '简单', '中等', '困难', 'beginner', 'easy', 'medium', 'hard']
        if field_type == 'select':
            for opt in options:
                opt_value = opt.get('value') if isinstance(opt, dict) else opt
                if opt_value in difficulty_values:
                    return 'difficulty'
        
        # 2. 语言字段
        if field_type == 'select':
            if ('language' in field_id or 'lang' in field_id or 
                '语言' in field_label or field.get('options_source') == 'languages'):
                return 'language'
        
        # 3. 漏洞/知识点字段
        if field_type in ['multi_select', 'multi_select_categorized']:
            return 'vulnerability'
        
        # 4. 场景字段
        if field_type in ['select', 'select_with_sub']:
            if ('scene' in field_id or '场景' in field_label or 
                field.get('options_source') == 'scenes'):
                return 'scene'
        
        # 5. 额外要求字段
        if field_type == 'textarea':
            if any(kw in field_label for kw in ['要求', '需求', '备注', '说明', '额外']):
                return 'extra_requirements'
        
        return None
    
    @staticmethod
    def build_instruction_from_form(form_fields_data: List[Dict]) -> str:
        """从表单数据构建 AI 指令
        
        Args:
            form_fields_data: 表单字段数据列表
            
        Returns:
            格式化的指令字符串，如：
            - 编程语言：Python
            - 难度级别：入门
            - 漏洞类型：SQL注入, XSS
            - 应用场景：网上商城
        """
        if not form_fields_data:
            return ''
        
        lines = []
        for field in form_fields_data:
            label = field.get('label', '')
            value = field.get('value', '')
            if label and value:
                lines.append(f"- {label}：{value}")
        
        return '\n'.join(lines)
