gskaro-v1/internal/llm/stream.go

76 lines
1.5 KiB
Go

package llm
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
type ollamaGenerateRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Stream bool `json:"stream"`
}
type OllamaGenerateResponse struct {
Model string `json:"model"`
CreatedAt string `json:"created_at"`
Response string `json:"response"`
Done bool `json:"done"`
PromptEvalCount int `json:"prompt_eval_count"`
EvalCount int `json:"eval_count"`
}
type OllamaClient struct {
Host string
Model string
}
// Stream — стриминг ответа модели по частям
func (c *OllamaClient) Stream(prompt string, callback func(chunk string, meta *OllamaGenerateResponse)) error {
if c.Host == "" {
c.Host = ActiveHost
}
if c.Model == "" {
c.Model = ActiveModel
}
reqBody := ollamaGenerateRequest{
Model: c.Model,
Prompt: prompt,
Stream: true,
}
var buf bytes.Buffer
if err := json.NewEncoder(&buf).Encode(&reqBody); err != nil {
return fmt.Errorf("ошибка кодирования запроса: %w", err)
}
resp, err := http.Post(c.Host+"/api/generate", "application/json", &buf)
if err != nil {
return fmt.Errorf("ошибка запроса к %s: %w", c.Host, err)
}
defer resp.Body.Close()
dec := json.NewDecoder(resp.Body)
for {
var msg OllamaGenerateResponse
if err := dec.Decode(&msg); err != nil {
break // конец стрима
}
if msg.Response != "" {
callback(msg.Response, nil)
}
if msg.Done {
callback("", &msg)
break
}
}
return nil
}