76 lines
1.5 KiB
Go
76 lines
1.5 KiB
Go
package llm
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
)
|
|
|
|
type ollamaGenerateRequest struct {
|
|
Model string `json:"model"`
|
|
Prompt string `json:"prompt"`
|
|
Stream bool `json:"stream"`
|
|
}
|
|
|
|
type OllamaGenerateResponse struct {
|
|
Model string `json:"model"`
|
|
CreatedAt string `json:"created_at"`
|
|
Response string `json:"response"`
|
|
Done bool `json:"done"`
|
|
PromptEvalCount int `json:"prompt_eval_count"`
|
|
EvalCount int `json:"eval_count"`
|
|
}
|
|
|
|
type OllamaClient struct {
|
|
Host string
|
|
Model string
|
|
}
|
|
|
|
// Stream — стриминг ответа модели по частям
|
|
func (c *OllamaClient) Stream(prompt string, callback func(chunk string, meta *OllamaGenerateResponse)) error {
|
|
if c.Host == "" {
|
|
c.Host = ActiveHost
|
|
}
|
|
if c.Model == "" {
|
|
c.Model = ActiveModel
|
|
}
|
|
|
|
reqBody := ollamaGenerateRequest{
|
|
Model: c.Model,
|
|
Prompt: prompt,
|
|
Stream: true,
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
if err := json.NewEncoder(&buf).Encode(&reqBody); err != nil {
|
|
return fmt.Errorf("ошибка кодирования запроса: %w", err)
|
|
}
|
|
|
|
resp, err := http.Post(c.Host+"/api/generate", "application/json", &buf)
|
|
if err != nil {
|
|
return fmt.Errorf("ошибка запроса к %s: %w", c.Host, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
dec := json.NewDecoder(resp.Body)
|
|
|
|
for {
|
|
var msg OllamaGenerateResponse
|
|
if err := dec.Decode(&msg); err != nil {
|
|
break // конец стрима
|
|
}
|
|
|
|
if msg.Response != "" {
|
|
callback(msg.Response, nil)
|
|
}
|
|
|
|
if msg.Done {
|
|
callback("", &msg)
|
|
break
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|