Coverage for aipyapp/llm/client_ollama.py: 30%

33 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-11 12:02 +0200

1#! /usr/bin/env python3 

2# -*- coding: utf-8 -*- 

3 

4import json 

5import requests 

6 

7from . import BaseClient, ChatMessage 

8 

9# https://github.com/ollama/ollama/blob/main/docs/api.md 

10class OllamaClient(BaseClient): 

11 def __init__(self, config): 

12 super().__init__(config) 

13 self._session = requests.Session() 

14 

15 def usable(self): 

16 return super().usable() and self._base_url 

17 

18 def _parse_usage(self, response): 

19 ret = {'input_tokens': response['prompt_eval_count'], 'output_tokens': response['eval_count']} 

20 ret['total_tokens'] = ret['input_tokens'] + ret['output_tokens'] 

21 return ret 

22 

23 def _parse_stream_response(self, response, stream_processor): 

24 with stream_processor as lm: 

25 for chunk in response.iter_lines(): 

26 chunk = chunk.decode(encoding='utf-8') 

27 msg = json.loads(chunk) 

28 if msg['done']: 

29 usage = self._parse_usage(msg) 

30 break 

31 

32 if 'message' in msg and 'content' in msg['message'] and msg['message']['content']: 

33 content = msg['message']['content'] 

34 lm.process_chunk(content) 

35 

36 return ChatMessage(role="assistant", content=lm.content, usage=usage) 

37 

38 def _parse_response(self, response): 

39 response = response.json() 

40 msg = response["message"] 

41 return ChatMessage(role=msg['role'], content=msg['content'], usage=self._parse_usage(response)) 

42 

43 def get_completion(self, messages): 

44 response = self._session.post( 

45 f"{self._base_url}/api/chat", 

46 json={ 

47 "model": self._model, 

48 "messages": messages, 

49 "stream": self._stream, 

50 "options": {"num_predict": self.max_tokens, "temperature": self._temperature} 

51 }, 

52 timeout=self._timeout, 

53 **self._params 

54 ) 

55 response.raise_for_status() 

56 return response