Coverage for aipyapp/plugins/p_image_tool.py: 0%
47 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-11 12:02 +0200
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-11 12:02 +0200
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4import os
5import base64
6import mimetypes
7from typing import Union
9from openai import OpenAI
11from aipyapp import TaskPlugin, PluginInitError
13class ImageToolPlugin(TaskPlugin):
14 """图片识别工具插件"""
15 name = "image_tool"
16 version = "1.0.0"
17 description = "使用大模型识别和分析图片内容"
18 author = "AiPy Team"
20 def init(self):
21 """初始化OpenAI客户端"""
22 api_key = self.config.get('api_key')
23 base_url = self.config.get('base_url')
24 model = self.config.get('model', 'gpt-4-vision-preview')
26 if not api_key:
27 raise PluginInitError("未配置OpenAI API Key")
29 self.client = OpenAI(
30 api_key=api_key,
31 base_url=base_url
32 )
33 self.model = model
34 self.logger.info(f"初始化OpenAI客户端,模型: {model}")
36 def fn_recognize_image(self,
37 image_source: str,
38 prompt: str = "请描述这张图片的内容。",
39 return_json: bool = False
40 ) -> Union[str, dict]:
41 """
42 使用大模型识别图片内容,可接受本地路径或图片URL
44 Args:
45 image_source: 本地图片路径或远程图片URL
46 prompt: 分析提示词
47 return_json: 是否返回完整JSON响应
49 Returns:
50 字符串描述或JSON响应
51 """
52 return self._recognize_image(image_source, prompt, return_json)
54 def fn_analyze_image(self, image_source: str, analysis_type: str = "general") -> str:
55 """
56 深度分析图片内容
58 Args:
59 image_source: 本地图片路径或远程图片URL
60 analysis_type: 分析类型 (general/technical/artistic/text)
62 Returns:
63 分析结果
64 """
65 prompts = {
66 "general": "请详细描述这张图片的内容,包括主要对象、场景、颜色、构图等。",
67 "technical": "请从技术角度分析这张图片,包括拍摄参数、后期处理、技术特点等。",
68 "artistic": "请从艺术角度欣赏这张图片,分析其构图、色彩搭配、艺术风格等。",
69 "text": "请识别并提取图片中的所有文字内容,保持原有格式。"
70 }
72 prompt = prompts.get(analysis_type, prompts["general"])
73 return self._recognize_image(image_source, prompt, False)
75 def _recognize_image(self, image_source: str, prompt: str, return_json: bool) -> Union[str, dict]:
76 """内部图片识别实现"""
77 try:
78 # 判断是本地文件还是远程URL
79 if image_source.startswith("http://") or image_source.startswith("https://"):
80 image_url = {"type": "image_url", "image_url": {"url": image_source}}
81 else:
82 # 本地文件处理
83 if not os.path.exists(image_source):
84 raise FileNotFoundError(f"文件不存在: {image_source}")
86 mime_type, _ = mimetypes.guess_type(image_source)
87 if mime_type is None:
88 mime_type = "image/jpeg" # 默认MIME类型
90 with open(image_source, "rb") as f:
91 image_bytes = f.read()
92 base64_image = base64.b64encode(image_bytes).decode("utf-8")
93 data_url = f"data:{mime_type};base64,{base64_image}"
94 image_url = {"type": "image_url", "image_url": {"url": data_url}}
96 # 调用OpenAI Chat接口
97 response = self.client.chat.completions.create(
98 model=self.model,
99 messages=[
100 {
101 "role": "user",
102 "content": [
103 {"type": "text", "text": prompt},
104 image_url,
105 ],
106 }
107 ],
108 max_tokens=self.config.get('max_tokens', 1000),
109 )
111 # 返回值处理
112 if return_json:
113 return response.model_dump()
114 else:
115 return response.choices[0].message.content
117 except Exception as e:
118 self.logger.error(f"图片识别失败: {e}")
119 raise