RAG Pipeline

Complete working RAG pipeline. Reads markdown files from a docs/ directory, chunks them, embeds with Ollama, and answers questions from the content.

Sample Documents

Create a docs/ folder with these files before running:

docs/goroutines.md

# Goroutines

A goroutine is a lightweight thread managed by the Go runtime. You start one with the `go` keyword before a function call.

Goroutines are cheap. You can spawn thousands without significant memory overhead. Each starts with a small stack (a few KB) that grows as needed.

The Go scheduler multiplexes goroutines onto OS threads. You don't control which thread a goroutine runs on. The runtime handles it.

Goroutines communicate through channels. Shared memory with mutexes works too, but channels are the idiomatic approach in Go.

docs/channels.md

# Channels

Channels are typed conduits for sending and receiving values between goroutines. They provide synchronization without explicit locks.

An unbuffered channel blocks the sender until the receiver is ready, and vice versa. This makes it a synchronization point.

A buffered channel has capacity. Sends only block when the buffer is full. Receives only block when the buffer is empty.

Use `select` to wait on multiple channel operations. It picks whichever is ready first. Add a `default` case to make it non-blocking.

Close a channel with `close(ch)` to signal that no more values will be sent. Receivers can detect this with the two-value form: `v, ok := <-ch`.

docs/error-handling.md

# Error Handling in Go

Go uses explicit error returns instead of exceptions. Functions that can fail return an error as the last value.

The `error` interface has one method: `Error() string`. Any type implementing it can be used as an error.

Wrap errors with `fmt.Errorf("context: %w", err)` to add context while preserving the original. Unwrap with `errors.Is()` and `errors.As()`.

Sentinel errors like `io.EOF` are compared with `errors.Is()`. Custom error types are matched with `errors.As()`.

Don't ignore errors. If a function returns an error, handle it or propagate it. Silent failures are the hardest bugs to find.

Source Code

package main

import (
	"bufio"
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"math"
	"net/http"
	"os"
	"path/filepath"
	"sort"
	"strings"
)

type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type Chunk struct {
	Text      string
	Source    string
	Embedding []float64
}

// --- Chunking ---

func chunk(text string, size int) []string {
	paragraphs := strings.Split(text, "\n\n")
	var chunks []string
	var current strings.Builder

	for _, p := range paragraphs {
		p = strings.TrimSpace(p)
		if p == "" {
			continue
		}
		if current.Len()+len(p) > size && current.Len() > 0 {
			chunks = append(chunks, current.String())
			current.Reset()
		}
		if current.Len() > 0 {
			current.WriteString("\n\n")
		}
		current.WriteString(p)
	}
	if current.Len() > 0 {
		chunks = append(chunks, current.String())
	}
	return chunks
}

// --- Embedding ---

func embed(text string) ([]float64, error) {
	body, _ := json.Marshal(map[string]any{
		"model": "nomic-embed-text",
		"input": text,
	})
	resp, err := http.Post("http://localhost:11434/api/embed", "application/json", bytes.NewReader(body))
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	data, _ := io.ReadAll(resp.Body)
	var result struct {
		Embeddings [][]float64 `json:"embeddings"`
	}
	json.Unmarshal(data, &result)
	return result.Embeddings[0], nil
}

func cosineSimilarity(a, b []float64) float64 {
	var dot, normA, normB float64
	for i := range a {
		dot += a[i] * b[i]
		normA += a[i] * a[i]
		normB += b[i] * b[i]
	}
	if normA == 0 || normB == 0 {
		return 0
	}
	return dot / (math.Sqrt(normA) * math.Sqrt(normB))
}

// --- Indexing ---

func indexDocuments(dir string) ([]Chunk, error) {
	files, err := os.ReadDir(dir)
	if err != nil {
		return nil, err
	}
	var chunks []Chunk
	for _, f := range files {
		if !strings.HasSuffix(f.Name(), ".md") {
			continue
		}
		content, _ := os.ReadFile(filepath.Join(dir, f.Name()))
		parts := chunk(string(content), 600)
		for _, part := range parts {
			vec, err := embed(part)
			if err != nil {
				return nil, fmt.Errorf("embed %s: %w", f.Name(), err)
			}
			chunks = append(chunks, Chunk{Text: part, Source: f.Name(), Embedding: vec})
		}
		fmt.Printf("  %s: %d chunks\n", f.Name(), len(parts))
	}
	return chunks, nil
}

// --- Retrieval ---

func retrieve(docs []Chunk, query string, topK int) []Chunk {
	queryVec, _ := embed(query)
	type scored struct {
		chunk Chunk
		score float64
	}
	results := make([]scored, len(docs))
	for i, doc := range docs {
		results[i] = scored{doc, cosineSimilarity(queryVec, doc.Embedding)}
	}
	sort.Slice(results, func(i, j int) bool {
		return results[i].score > results[j].score
	})
	top := make([]Chunk, 0, topK)
	for i := 0; i < topK && i < len(results); i++ {
		top = append(top, results[i].chunk)
	}
	return top
}

// --- Generation ---

func chat(messages []Message) (string, error) {
	body, _ := json.Marshal(map[string]any{
		"model":    "llama3.2",
		"messages": messages,
		"stream":   false,
	})
	resp, err := http.Post("http://localhost:11434/api/chat", "application/json", bytes.NewReader(body))
	if err != nil {
		return "", err
	}
	defer resp.Body.Close()
	data, _ := io.ReadAll(resp.Body)
	var result struct {
		Message struct {
			Content string `json:"content"`
		} `json:"message"`
	}
	json.Unmarshal(data, &result)
	return result.Message.Content, nil
}

func rag(docs []Chunk, question string) (string, error) {
	relevant := retrieve(docs, question, 3)
	var context strings.Builder
	for i, c := range relevant {
		fmt.Fprintf(&context, "[%d] (%s)\n%s\n\n", i+1, c.Source, c.Text)
	}
	messages := []Message{
		{Role: "system", Content: `Answer using ONLY the provided context.
If the context doesn't have the answer, say "I don't have that information."
Cite sources using [1], [2], etc.`},
		{Role: "user", Content: fmt.Sprintf("Context:\n%s\nQuestion: %s",
			context.String(), question)},
	}
	return chat(messages)
}

// --- Main ---

func main() {
	fmt.Println("Indexing docs/...")
	docs, err := indexDocuments("./docs")
	if err != nil {
		fmt.Println("Error:", err)
		return
	}
	fmt.Printf("Indexed %d chunks\n", len(docs))

	scanner := bufio.NewScanner(os.Stdin)
	for {
		fmt.Print("\nQuestion: ")
		if !scanner.Scan() {
			break
		}
		q := scanner.Text()
		if q == "" {
			continue
		}
		answer, err := rag(docs, q)
		if err != nil {
			fmt.Println("Error:", err)
			continue
		}
		fmt.Println("\n" + answer)
	}
}

Sample Output

Your answers will differ since LLMs are non-deterministic, but here's what a session looks like:

Indexing docs/...
  channels.md: 3 chunks
  error-handling.md: 3 chunks
  goroutines.md: 2 chunks
Indexed 8 chunks

Question: How do goroutines communicate?

Goroutines communicate through channels [1]. Channels are typed conduits
for sending and receiving values between goroutines, providing
synchronization without explicit locks [2]. Shared memory with mutexes
works too, but channels are the idiomatic approach in Go [1].

Question: What happens if I ignore an error?

Don't ignore errors. If a function returns an error, handle it or
propagate it. Silent failures are the hardest bugs to find [1].

Question: What is Kubernetes?

I don't have that information.

💻 Run locally

Copy the code above and run it on your machine

© 2026 ByteLearn.dev. Free courses for developers. · Privacy