Token Counter

Calls Ollama, limits output tokens with num_predict, and prints the token usage from the response.

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
)

func main() {
	body, _ := json.Marshal(map[string]any{
		"model": "llama3.2",
		"messages": []map[string]string{
			{"role": "user", "content": "What is Go? One sentence."},
		},
		"stream": false,
		"options": map[string]any{
			"num_predict": 100,
		},
	})

	resp, err := http.Post("http://localhost:11434/api/chat", "application/json", bytes.NewReader(body))
	if err != nil {
		fmt.Println("Error:", err)
		return
	}
	defer resp.Body.Close()

	data, _ := io.ReadAll(resp.Body)

	var result struct {
		Message struct {
			Content string `json:"content"`
		} `json:"message"`
		PromptEvalCount int `json:"prompt_eval_count"`
		EvalCount       int `json:"eval_count"`
	}
	json.Unmarshal(data, &result)

	fmt.Println("Response:", result.Message.Content)
	fmt.Printf("Input tokens:  %d\n", result.PromptEvalCount)
	fmt.Printf("Output tokens: %d\n", result.EvalCount)
	fmt.Printf("Total tokens:  %d\n", result.PromptEvalCount+result.EvalCount)
	// Response: Go is a statically typed, compiled programming language...
	// Input tokens:  32
	// Output tokens: 28
	// Total tokens:  60
}

💻 Run locally

Copy the code above and run it on your machine

© 2026 ByteLearn.dev. Free courses for developers. · Privacy