diff options
Diffstat (limited to 'internal/processor')
| -rw-r--r-- | internal/processor/audio.go | 49 | ||||
| -rw-r--r-- | internal/processor/image.go | 116 | ||||
| -rw-r--r-- | internal/processor/markdown.go | 68 | ||||
| -rw-r--r-- | internal/processor/processor.go | 234 | ||||
| -rw-r--r-- | internal/processor/txt.go | 103 |
5 files changed, 570 insertions, 0 deletions
diff --git a/internal/processor/audio.go b/internal/processor/audio.go new file mode 100644 index 0000000..98aedcf --- /dev/null +++ b/internal/processor/audio.go @@ -0,0 +1,49 @@ +package processor + +import ( + "fmt" + "io" + "os" + "path/filepath" +) + +// processAudio copies an .mp3 file into destDir and returns an HTML <audio> snippet. +// The audio element has controls enabled so visitors can play it inline. +func processAudio(srcPath, destDir, postID string) (filename, htmlContent string, err error) { + outName := filepath.Base(srcPath) + outPath := filepath.Join(destDir, outName) + + if err := copyFile(srcPath, outPath); err != nil { + return "", "", err + } + + // The src attribute is relative to the site root. + src := fmt.Sprintf("posts/%s/%s", postID, outName) + html := fmt.Sprintf( + `<audio controls class="post-audio"><source src="%s" type="audio/mpeg">Your browser does not support audio.</audio>`, + src, + ) + + return outName, html, nil +} + +// copyFile copies the file at src to dst, creating dst if it does not exist. +func copyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return fmt.Errorf("open source %s: %w", src, err) + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return fmt.Errorf("create dest %s: %w", dst, err) + } + defer out.Close() + + if _, err := io.Copy(out, in); err != nil { + return fmt.Errorf("copy %s → %s: %w", src, dst, err) + } + + return nil +} diff --git a/internal/processor/image.go b/internal/processor/image.go new file mode 100644 index 0000000..9a7d769 --- /dev/null +++ b/internal/processor/image.go @@ -0,0 +1,116 @@ +package processor + +import ( + "fmt" + "image" + "image/gif" + "image/jpeg" + "image/png" + "os" + "path/filepath" + + "golang.org/x/image/draw" +) + +const ( + maxImageWidth = 1024 + jpegQuality = 80 +) + +// processImage reads the source image, resizes it if wider than maxImageWidth, +// encodes it as JPEG at jpegQuality, and writes the result to destDir. +// Returns the output filename (always a .jpg) and an HTML <img> snippet. +func processImage(srcPath, destDir, postID string) (filename, htmlContent string, err error) { + img, err := decodeImage(srcPath) + if err != nil { + return "", "", err + } + + img = resizeIfNeeded(img) + + outName := "image.jpg" + outPath := filepath.Join(destDir, outName) + + if err := writeJPEG(img, outPath); err != nil { + return "", "", err + } + + // The <img> src is relative to the site root, pointing into the posts dir. + src := fmt.Sprintf("posts/%s/%s", postID, outName) + html := fmt.Sprintf(`<img src="%s" alt="" class="post-image">`, src) + + return outName, html, nil +} + +// decodeImage decodes a JPEG, PNG, or GIF (first frame) from srcPath. +func decodeImage(srcPath string) (image.Image, error) { + f, err := os.Open(srcPath) + if err != nil { + return nil, fmt.Errorf("open image %s: %w", srcPath, err) + } + defer f.Close() + + ext := filepath.Ext(srcPath) + switch ext { + case ".jpg", ".jpeg": + img, err := jpeg.Decode(f) + if err != nil { + return nil, fmt.Errorf("decode JPEG %s: %w", srcPath, err) + } + return img, nil + + case ".png": + img, err := png.Decode(f) + if err != nil { + return nil, fmt.Errorf("decode PNG %s: %w", srcPath, err) + } + return img, nil + + case ".gif": + // Use only the first frame of animated GIFs. + g, err := gif.Decode(f) + if err != nil { + return nil, fmt.Errorf("decode GIF %s: %w", srcPath, err) + } + return g, nil + + default: + return nil, fmt.Errorf("unsupported image format: %s", ext) + } +} + +// resizeIfNeeded returns a resized copy of img if its width exceeds maxImageWidth, +// preserving aspect ratio. Otherwise the original is returned unchanged. +func resizeIfNeeded(img image.Image) image.Image { + bounds := img.Bounds() + w := bounds.Dx() + + if w <= maxImageWidth { + return img + } + + h := bounds.Dy() + newW := maxImageWidth + newH := (h * newW) / w + + dst := image.NewRGBA(image.Rect(0, 0, newW, newH)) + draw.BiLinear.Scale(dst, dst.Bounds(), img, bounds, draw.Over, nil) + + return dst +} + +// writeJPEG encodes img as JPEG at the configured quality level and writes to path. +func writeJPEG(img image.Image, path string) error { + f, err := os.Create(path) + if err != nil { + return fmt.Errorf("create JPEG %s: %w", path, err) + } + defer f.Close() + + opts := &jpeg.Options{Quality: jpegQuality} + if err := jpeg.Encode(f, img, opts); err != nil { + return fmt.Errorf("encode JPEG %s: %w", path, err) + } + + return nil +} diff --git a/internal/processor/markdown.go b/internal/processor/markdown.go new file mode 100644 index 0000000..8d69bfe --- /dev/null +++ b/internal/processor/markdown.go @@ -0,0 +1,68 @@ +package processor + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/renderer/html" +) + +// imageRefPattern matches Markdown image syntax:  +// We use it to discover local asset references that must be copied. +var imageRefPattern = regexp.MustCompile(`!\[[^\]]*\]\(([^)]+)\)`) + +// processMd converts a Markdown file to an HTML snippet. +// Returns the HTML and a list of local image filenames referenced in the document. +// Referenced images that exist alongside the source file are returned so the +// caller can copy them into the post asset directory. +func processMd(path string) (htmlContent string, localImages []string, err error) { + data, err := os.ReadFile(path) + if err != nil { + return "", nil, fmt.Errorf("read markdown %s: %w", path, err) + } + + // Collect local image references so the caller can copy them as assets. + localImages = findLocalImages(string(data), filepath.Dir(path)) + + md := goldmark.New( + goldmark.WithExtensions(extension.GFM), + goldmark.WithRendererOptions( + html.WithUnsafe(), // Allow raw HTML in markdown (user-controlled content). + ), + ) + + var buf bytes.Buffer + if err := md.Convert(data, &buf); err != nil { + return "", nil, fmt.Errorf("convert markdown %s: %w", path, err) + } + + return buf.String(), localImages, nil +} + +// findLocalImages returns image filenames referenced in markdown that actually +// exist in sourceDir. Remote URLs (http/https) are ignored. +func findLocalImages(mdContent, sourceDir string) []string { + matches := imageRefPattern.FindAllStringSubmatch(mdContent, -1) + var locals []string + + for _, m := range matches { + ref := m[1] + // Skip remote URLs. + if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") { + continue + } + + candidate := filepath.Join(sourceDir, ref) + if _, err := os.Stat(candidate); err == nil { + locals = append(locals, filepath.Base(ref)) + } + } + + return locals +} diff --git a/internal/processor/processor.go b/internal/processor/processor.go new file mode 100644 index 0000000..077cc5b --- /dev/null +++ b/internal/processor/processor.go @@ -0,0 +1,234 @@ +// Package processor scans the input directory for new source files and converts +// each one into a self-contained post directory under outdir/posts/. +// Supported formats: .txt, .md, .png, .jpg, .jpeg, .gif, .mp3. +// Each processed source file is deleted from the input directory afterward. +package processor + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "codeberg.org/snonux/snonux/internal/config" + "codeberg.org/snonux/snonux/internal/post" +) + +// Run scans cfg.InputDir and processes every eligible file into a post directory +// under cfg.OutputDir/posts/. Returns the number of posts created. +// +// Images referenced by a .md file in the same input directory are consumed by +// that markdown post and are not processed as independent image posts. +func Run(cfg *config.Config) (int, error) { + entries, err := os.ReadDir(cfg.InputDir) + if err != nil { + return 0, fmt.Errorf("read input dir %s: %w", cfg.InputDir, err) + } + + postsDir := filepath.Join(cfg.OutputDir, "posts") + if err := os.MkdirAll(postsDir, 0o755); err != nil { + return 0, fmt.Errorf("create posts dir: %w", err) + } + + // Pre-scan markdown files to discover which image filenames they claim. + // Claimed images are excluded from independent processing. + claimed := claimedByMarkdown(entries, cfg.InputDir) + + count := 0 + + for _, entry := range entries { + if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") { + continue + } + if claimed[entry.Name()] { + continue // consumed by a .md post — skip independent processing + } + + srcPath := filepath.Join(cfg.InputDir, entry.Name()) + if err := processFile(srcPath, postsDir); err != nil { + return count, fmt.Errorf("process %s: %w", entry.Name(), err) + } + + count++ + } + + return count, nil +} + +// claimedByMarkdown scans all .md entries in inputDir and returns a set of +// image filenames that are referenced within those markdown files. +// Those images should be embedded in the markdown post, not processed alone. +func claimedByMarkdown(entries []os.DirEntry, inputDir string) map[string]bool { + claimed := make(map[string]bool) + + for _, entry := range entries { + if entry.IsDir() || strings.ToLower(filepath.Ext(entry.Name())) != ".md" { + continue + } + + mdPath := filepath.Join(inputDir, entry.Name()) + data, err := os.ReadFile(mdPath) + if err != nil { + continue + } + + for _, imgName := range findLocalImages(string(data), inputDir) { + claimed[imgName] = true + } + } + + return claimed +} + +// processFile processes a single input file into a new post directory. +// The source file is removed from the input dir on success. +func processFile(srcPath, postsDir string) error { + now := time.Now().UTC() + id := uniqueID(postsDir, now) + + postDir := filepath.Join(postsDir, id) + if err := os.MkdirAll(postDir, 0o755); err != nil { + return fmt.Errorf("create post dir %s: %w", id, err) + } + + p, err := buildPost(srcPath, postDir, id) + if err != nil { + // Clean up the half-created directory to avoid partial state. + _ = os.RemoveAll(postDir) + return err + } + + if err := p.Save(postDir); err != nil { + _ = os.RemoveAll(postDir) + return err + } + + // Delete the source file only after the post has been successfully persisted. + return os.Remove(srcPath) +} + +// buildPost dispatches to the appropriate sub-processor based on file extension +// and returns a populated Post ready to be saved. +func buildPost(srcPath, postDir, id string) (*post.Post, error) { + ext := strings.ToLower(filepath.Ext(srcPath)) + + switch ext { + case ".txt": + return buildTextPost(srcPath, id) + + case ".md": + return buildMarkdownPost(srcPath, postDir, id) + + case ".png", ".jpg", ".jpeg", ".gif": + return buildImagePost(srcPath, postDir, id) + + case ".mp3": + return buildAudioPost(srcPath, postDir, id) + + default: + return nil, fmt.Errorf("unsupported file type: %s", ext) + } +} + +func buildTextPost(srcPath, id string) (*post.Post, error) { + html, err := processTxt(srcPath) + if err != nil { + return nil, err + } + + return &post.Post{ + ID: id, + Timestamp: time.Now().UTC(), + PostType: post.TypeText, + Content: html, + }, nil +} + +func buildMarkdownPost(srcPath, postDir, id string) (*post.Post, error) { + html, localImages, err := processMd(srcPath) + if err != nil { + return nil, err + } + + sourceDir := filepath.Dir(srcPath) + + assets, err := copyLocalImages(localImages, sourceDir, postDir) + if err != nil { + return nil, err + } + + // Delete the referenced image files from the input dir so they are not + // processed again as independent posts. + for _, name := range localImages { + _ = os.Remove(filepath.Join(sourceDir, name)) + } + + return &post.Post{ + ID: id, + Timestamp: time.Now().UTC(), + PostType: post.TypeMarkdown, + Content: html, + Assets: assets, + }, nil +} + +func buildImagePost(srcPath, postDir, id string) (*post.Post, error) { + filename, html, err := processImage(srcPath, postDir, id) + if err != nil { + return nil, err + } + + return &post.Post{ + ID: id, + Timestamp: time.Now().UTC(), + PostType: post.TypeImage, + Content: html, + Assets: []string{filename}, + }, nil +} + +func buildAudioPost(srcPath, postDir, id string) (*post.Post, error) { + filename, html, err := processAudio(srcPath, postDir, id) + if err != nil { + return nil, err + } + + return &post.Post{ + ID: id, + Timestamp: time.Now().UTC(), + PostType: post.TypeAudio, + Content: html, + Assets: []string{filename}, + }, nil +} + +// copyLocalImages copies referenced image files from sourceDir into postDir. +// Returns the list of filenames that were successfully copied. +func copyLocalImages(filenames []string, sourceDir, postDir string) ([]string, error) { + var copied []string + + for _, name := range filenames { + src := filepath.Join(sourceDir, name) + dst := filepath.Join(postDir, name) + + if err := copyFile(src, dst); err != nil { + return nil, fmt.Errorf("copy image asset %s: %w", name, err) + } + + copied = append(copied, name) + } + + return copied, nil +} + +// uniqueID generates a post ID for the given time that does not already exist +// as a directory under postsDir. Appends a numeric suffix if needed. +func uniqueID(postsDir string, t time.Time) string { + for i := 0; ; i++ { + id := post.NewID(t, i) + if _, err := os.Stat(filepath.Join(postsDir, id)); os.IsNotExist(err) { + return id + } + } +} diff --git a/internal/processor/txt.go b/internal/processor/txt.go new file mode 100644 index 0000000..8381271 --- /dev/null +++ b/internal/processor/txt.go @@ -0,0 +1,103 @@ +package processor + +import ( + "fmt" + "html" + "os" + "regexp" + "strings" +) + +// urlPattern matches http/https URLs in plain text. +// Trailing sentence punctuation is stripped separately by stripURLTrailing. +var urlPattern = regexp.MustCompile(`https?://\S+`) + +// processTxt reads a plain-text file and wraps each non-empty paragraph in <p> tags. +// URLs are automatically converted to clickable <a> links. +// Non-URL text is HTML-escaped to prevent XSS. +func processTxt(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("read txt %s: %w", path, err) + } + + raw := strings.TrimSpace(string(data)) + if raw == "" { + return "<p></p>", nil + } + + // Split on blank lines to get logical paragraphs. + paragraphs := strings.Split(raw, "\n\n") + var sb strings.Builder + + for _, para := range paragraphs { + trimmed := strings.TrimSpace(para) + if trimmed == "" { + continue + } + fmt.Fprintf(&sb, "<p>%s</p>\n", formatParagraph(trimmed)) + } + + return sb.String(), nil +} + +// formatParagraph formats a single paragraph: auto-links URLs, escapes non-URL +// text, and converts single newlines to <br> line breaks. +func formatParagraph(para string) string { + lines := strings.Split(para, "\n") + formatted := make([]string, 0, len(lines)) + + for _, line := range lines { + if t := strings.TrimSpace(line); t != "" { + formatted = append(formatted, autolinkLine(t)) + } + } + + return strings.Join(formatted, "<br>\n") +} + +// autolinkLine escapes non-URL text and wraps detected URLs in <a> tags. +// Opens in a new tab with rel="noopener noreferrer" for security. +func autolinkLine(line string) string { + locs := urlPattern.FindAllStringIndex(line, -1) + if len(locs) == 0 { + return html.EscapeString(line) + } + + var sb strings.Builder + prev := 0 + + for _, loc := range locs { + sb.WriteString(html.EscapeString(line[prev:loc[0]])) + + rawURL := line[loc[0]:loc[1]] + cleanURL := stripURLTrailing(rawURL) + trailing := rawURL[len(cleanURL):] + + fmt.Fprintf(&sb, `<a href="%s" target="_blank" rel="noopener noreferrer">%s</a>`, + html.EscapeString(cleanURL), html.EscapeString(cleanURL)) + + if trailing != "" { + sb.WriteString(html.EscapeString(trailing)) + } + + prev = loc[1] + } + + sb.WriteString(html.EscapeString(line[prev:])) + + return sb.String() +} + +// stripURLTrailing removes common sentence-ending punctuation from the end of a +// URL match. These characters are valid in URLs but almost never appear there +// at the end in prose (e.g. "Visit https://foo.com." — the "." ends the sentence). +func stripURLTrailing(u string) string { + const cutset = ".,;:!?\"')>]}" + + for len(u) > 0 && strings.ContainsRune(cutset, rune(u[len(u)-1])) { + u = u[:len(u)-1] + } + + return u +} |
