Token Counter
Calls Ollama, limits output tokens with num_predict, and prints the token usage from the response.
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
)
func main() {
body, _ := json.Marshal(map[string]any{
"model": "llama3.2",
"messages": []map[string]string{
{"role": "user", "content": "What is Go? One sentence."},
},
"stream": false,
"options": map[string]any{
"num_predict": 100,
},
})
resp, err := http.Post("http://localhost:11434/api/chat", "application/json", bytes.NewReader(body))
if err != nil {
fmt.Println("Error:", err)
return
}
defer resp.Body.Close()
data, _ := io.ReadAll(resp.Body)
var result struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
PromptEvalCount int `json:"prompt_eval_count"`
EvalCount int `json:"eval_count"`
}
json.Unmarshal(data, &result)
fmt.Println("Response:", result.Message.Content)
fmt.Printf("Input tokens: %d\n", result.PromptEvalCount)
fmt.Printf("Output tokens: %d\n", result.EvalCount)
fmt.Printf("Total tokens: %d\n", result.PromptEvalCount+result.EvalCount)
// Response: Go is a statically typed, compiled programming language...
// Input tokens: 32
// Output tokens: 28
// Total tokens: 60
}