Coverage for aipyapp/llm/client_ollama.py: 30%
33 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-11 12:02 +0200
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-11 12:02 +0200
1#! /usr/bin/env python3
2# -*- coding: utf-8 -*-
4import json
5import requests
7from . import BaseClient, ChatMessage
9# https://github.com/ollama/ollama/blob/main/docs/api.md
10class OllamaClient(BaseClient):
11 def __init__(self, config):
12 super().__init__(config)
13 self._session = requests.Session()
15 def usable(self):
16 return super().usable() and self._base_url
18 def _parse_usage(self, response):
19 ret = {'input_tokens': response['prompt_eval_count'], 'output_tokens': response['eval_count']}
20 ret['total_tokens'] = ret['input_tokens'] + ret['output_tokens']
21 return ret
23 def _parse_stream_response(self, response, stream_processor):
24 with stream_processor as lm:
25 for chunk in response.iter_lines():
26 chunk = chunk.decode(encoding='utf-8')
27 msg = json.loads(chunk)
28 if msg['done']:
29 usage = self._parse_usage(msg)
30 break
32 if 'message' in msg and 'content' in msg['message'] and msg['message']['content']:
33 content = msg['message']['content']
34 lm.process_chunk(content)
36 return ChatMessage(role="assistant", content=lm.content, usage=usage)
38 def _parse_response(self, response):
39 response = response.json()
40 msg = response["message"]
41 return ChatMessage(role=msg['role'], content=msg['content'], usage=self._parse_usage(response))
43 def get_completion(self, messages):
44 response = self._session.post(
45 f"{self._base_url}/api/chat",
46 json={
47 "model": self._model,
48 "messages": messages,
49 "stream": self._stream,
50 "options": {"num_predict": self.max_tokens, "temperature": self._temperature}
51 },
52 timeout=self._timeout,
53 **self._params
54 )
55 response.raise_for_status()
56 return response