summaryrefslogtreecommitdiff
path: root/internal/processor
diff options
context:
space:
mode:
Diffstat (limited to 'internal/processor')
-rw-r--r--internal/processor/audio.go49
-rw-r--r--internal/processor/image.go116
-rw-r--r--internal/processor/markdown.go68
-rw-r--r--internal/processor/processor.go234
-rw-r--r--internal/processor/txt.go103
5 files changed, 570 insertions, 0 deletions
diff --git a/internal/processor/audio.go b/internal/processor/audio.go
new file mode 100644
index 0000000..98aedcf
--- /dev/null
+++ b/internal/processor/audio.go
@@ -0,0 +1,49 @@
+package processor
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+)
+
+// processAudio copies an .mp3 file into destDir and returns an HTML <audio> snippet.
+// The audio element has controls enabled so visitors can play it inline.
+func processAudio(srcPath, destDir, postID string) (filename, htmlContent string, err error) {
+ outName := filepath.Base(srcPath)
+ outPath := filepath.Join(destDir, outName)
+
+ if err := copyFile(srcPath, outPath); err != nil {
+ return "", "", err
+ }
+
+ // The src attribute is relative to the site root.
+ src := fmt.Sprintf("posts/%s/%s", postID, outName)
+ html := fmt.Sprintf(
+ `<audio controls class="post-audio"><source src="%s" type="audio/mpeg">Your browser does not support audio.</audio>`,
+ src,
+ )
+
+ return outName, html, nil
+}
+
+// copyFile copies the file at src to dst, creating dst if it does not exist.
+func copyFile(src, dst string) error {
+ in, err := os.Open(src)
+ if err != nil {
+ return fmt.Errorf("open source %s: %w", src, err)
+ }
+ defer in.Close()
+
+ out, err := os.Create(dst)
+ if err != nil {
+ return fmt.Errorf("create dest %s: %w", dst, err)
+ }
+ defer out.Close()
+
+ if _, err := io.Copy(out, in); err != nil {
+ return fmt.Errorf("copy %s → %s: %w", src, dst, err)
+ }
+
+ return nil
+}
diff --git a/internal/processor/image.go b/internal/processor/image.go
new file mode 100644
index 0000000..9a7d769
--- /dev/null
+++ b/internal/processor/image.go
@@ -0,0 +1,116 @@
+package processor
+
+import (
+ "fmt"
+ "image"
+ "image/gif"
+ "image/jpeg"
+ "image/png"
+ "os"
+ "path/filepath"
+
+ "golang.org/x/image/draw"
+)
+
+const (
+ maxImageWidth = 1024
+ jpegQuality = 80
+)
+
+// processImage reads the source image, resizes it if wider than maxImageWidth,
+// encodes it as JPEG at jpegQuality, and writes the result to destDir.
+// Returns the output filename (always a .jpg) and an HTML <img> snippet.
+func processImage(srcPath, destDir, postID string) (filename, htmlContent string, err error) {
+ img, err := decodeImage(srcPath)
+ if err != nil {
+ return "", "", err
+ }
+
+ img = resizeIfNeeded(img)
+
+ outName := "image.jpg"
+ outPath := filepath.Join(destDir, outName)
+
+ if err := writeJPEG(img, outPath); err != nil {
+ return "", "", err
+ }
+
+ // The <img> src is relative to the site root, pointing into the posts dir.
+ src := fmt.Sprintf("posts/%s/%s", postID, outName)
+ html := fmt.Sprintf(`<img src="%s" alt="" class="post-image">`, src)
+
+ return outName, html, nil
+}
+
+// decodeImage decodes a JPEG, PNG, or GIF (first frame) from srcPath.
+func decodeImage(srcPath string) (image.Image, error) {
+ f, err := os.Open(srcPath)
+ if err != nil {
+ return nil, fmt.Errorf("open image %s: %w", srcPath, err)
+ }
+ defer f.Close()
+
+ ext := filepath.Ext(srcPath)
+ switch ext {
+ case ".jpg", ".jpeg":
+ img, err := jpeg.Decode(f)
+ if err != nil {
+ return nil, fmt.Errorf("decode JPEG %s: %w", srcPath, err)
+ }
+ return img, nil
+
+ case ".png":
+ img, err := png.Decode(f)
+ if err != nil {
+ return nil, fmt.Errorf("decode PNG %s: %w", srcPath, err)
+ }
+ return img, nil
+
+ case ".gif":
+ // Use only the first frame of animated GIFs.
+ g, err := gif.Decode(f)
+ if err != nil {
+ return nil, fmt.Errorf("decode GIF %s: %w", srcPath, err)
+ }
+ return g, nil
+
+ default:
+ return nil, fmt.Errorf("unsupported image format: %s", ext)
+ }
+}
+
+// resizeIfNeeded returns a resized copy of img if its width exceeds maxImageWidth,
+// preserving aspect ratio. Otherwise the original is returned unchanged.
+func resizeIfNeeded(img image.Image) image.Image {
+ bounds := img.Bounds()
+ w := bounds.Dx()
+
+ if w <= maxImageWidth {
+ return img
+ }
+
+ h := bounds.Dy()
+ newW := maxImageWidth
+ newH := (h * newW) / w
+
+ dst := image.NewRGBA(image.Rect(0, 0, newW, newH))
+ draw.BiLinear.Scale(dst, dst.Bounds(), img, bounds, draw.Over, nil)
+
+ return dst
+}
+
+// writeJPEG encodes img as JPEG at the configured quality level and writes to path.
+func writeJPEG(img image.Image, path string) error {
+ f, err := os.Create(path)
+ if err != nil {
+ return fmt.Errorf("create JPEG %s: %w", path, err)
+ }
+ defer f.Close()
+
+ opts := &jpeg.Options{Quality: jpegQuality}
+ if err := jpeg.Encode(f, img, opts); err != nil {
+ return fmt.Errorf("encode JPEG %s: %w", path, err)
+ }
+
+ return nil
+}
diff --git a/internal/processor/markdown.go b/internal/processor/markdown.go
new file mode 100644
index 0000000..8d69bfe
--- /dev/null
+++ b/internal/processor/markdown.go
@@ -0,0 +1,68 @@
+package processor
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/extension"
+ "github.com/yuin/goldmark/renderer/html"
+)
+
+// imageRefPattern matches Markdown image syntax: ![alt](filename)
+// We use it to discover local asset references that must be copied.
+var imageRefPattern = regexp.MustCompile(`!\[[^\]]*\]\(([^)]+)\)`)
+
+// processMd converts a Markdown file to an HTML snippet.
+// Returns the HTML and a list of local image filenames referenced in the document.
+// Referenced images that exist alongside the source file are returned so the
+// caller can copy them into the post asset directory.
+func processMd(path string) (htmlContent string, localImages []string, err error) {
+ data, err := os.ReadFile(path)
+ if err != nil {
+ return "", nil, fmt.Errorf("read markdown %s: %w", path, err)
+ }
+
+ // Collect local image references so the caller can copy them as assets.
+ localImages = findLocalImages(string(data), filepath.Dir(path))
+
+ md := goldmark.New(
+ goldmark.WithExtensions(extension.GFM),
+ goldmark.WithRendererOptions(
+ html.WithUnsafe(), // Allow raw HTML in markdown (user-controlled content).
+ ),
+ )
+
+ var buf bytes.Buffer
+ if err := md.Convert(data, &buf); err != nil {
+ return "", nil, fmt.Errorf("convert markdown %s: %w", path, err)
+ }
+
+ return buf.String(), localImages, nil
+}
+
+// findLocalImages returns image filenames referenced in markdown that actually
+// exist in sourceDir. Remote URLs (http/https) are ignored.
+func findLocalImages(mdContent, sourceDir string) []string {
+ matches := imageRefPattern.FindAllStringSubmatch(mdContent, -1)
+ var locals []string
+
+ for _, m := range matches {
+ ref := m[1]
+ // Skip remote URLs.
+ if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") {
+ continue
+ }
+
+ candidate := filepath.Join(sourceDir, ref)
+ if _, err := os.Stat(candidate); err == nil {
+ locals = append(locals, filepath.Base(ref))
+ }
+ }
+
+ return locals
+}
diff --git a/internal/processor/processor.go b/internal/processor/processor.go
new file mode 100644
index 0000000..077cc5b
--- /dev/null
+++ b/internal/processor/processor.go
@@ -0,0 +1,234 @@
+// Package processor scans the input directory for new source files and converts
+// each one into a self-contained post directory under outdir/posts/.
+// Supported formats: .txt, .md, .png, .jpg, .jpeg, .gif, .mp3.
+// Each processed source file is deleted from the input directory afterward.
+package processor
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "time"
+
+ "codeberg.org/snonux/snonux/internal/config"
+ "codeberg.org/snonux/snonux/internal/post"
+)
+
+// Run scans cfg.InputDir and processes every eligible file into a post directory
+// under cfg.OutputDir/posts/. Returns the number of posts created.
+//
+// Images referenced by a .md file in the same input directory are consumed by
+// that markdown post and are not processed as independent image posts.
+func Run(cfg *config.Config) (int, error) {
+ entries, err := os.ReadDir(cfg.InputDir)
+ if err != nil {
+ return 0, fmt.Errorf("read input dir %s: %w", cfg.InputDir, err)
+ }
+
+ postsDir := filepath.Join(cfg.OutputDir, "posts")
+ if err := os.MkdirAll(postsDir, 0o755); err != nil {
+ return 0, fmt.Errorf("create posts dir: %w", err)
+ }
+
+ // Pre-scan markdown files to discover which image filenames they claim.
+ // Claimed images are excluded from independent processing.
+ claimed := claimedByMarkdown(entries, cfg.InputDir)
+
+ count := 0
+
+ for _, entry := range entries {
+ if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") {
+ continue
+ }
+ if claimed[entry.Name()] {
+ continue // consumed by a .md post — skip independent processing
+ }
+
+ srcPath := filepath.Join(cfg.InputDir, entry.Name())
+ if err := processFile(srcPath, postsDir); err != nil {
+ return count, fmt.Errorf("process %s: %w", entry.Name(), err)
+ }
+
+ count++
+ }
+
+ return count, nil
+}
+
+// claimedByMarkdown scans all .md entries in inputDir and returns a set of
+// image filenames that are referenced within those markdown files.
+// Those images should be embedded in the markdown post, not processed alone.
+func claimedByMarkdown(entries []os.DirEntry, inputDir string) map[string]bool {
+ claimed := make(map[string]bool)
+
+ for _, entry := range entries {
+ if entry.IsDir() || strings.ToLower(filepath.Ext(entry.Name())) != ".md" {
+ continue
+ }
+
+ mdPath := filepath.Join(inputDir, entry.Name())
+ data, err := os.ReadFile(mdPath)
+ if err != nil {
+ continue
+ }
+
+ for _, imgName := range findLocalImages(string(data), inputDir) {
+ claimed[imgName] = true
+ }
+ }
+
+ return claimed
+}
+
+// processFile processes a single input file into a new post directory.
+// The source file is removed from the input dir on success.
+func processFile(srcPath, postsDir string) error {
+ now := time.Now().UTC()
+ id := uniqueID(postsDir, now)
+
+ postDir := filepath.Join(postsDir, id)
+ if err := os.MkdirAll(postDir, 0o755); err != nil {
+ return fmt.Errorf("create post dir %s: %w", id, err)
+ }
+
+ p, err := buildPost(srcPath, postDir, id)
+ if err != nil {
+ // Clean up the half-created directory to avoid partial state.
+ _ = os.RemoveAll(postDir)
+ return err
+ }
+
+ if err := p.Save(postDir); err != nil {
+ _ = os.RemoveAll(postDir)
+ return err
+ }
+
+ // Delete the source file only after the post has been successfully persisted.
+ return os.Remove(srcPath)
+}
+
+// buildPost dispatches to the appropriate sub-processor based on file extension
+// and returns a populated Post ready to be saved.
+func buildPost(srcPath, postDir, id string) (*post.Post, error) {
+ ext := strings.ToLower(filepath.Ext(srcPath))
+
+ switch ext {
+ case ".txt":
+ return buildTextPost(srcPath, id)
+
+ case ".md":
+ return buildMarkdownPost(srcPath, postDir, id)
+
+ case ".png", ".jpg", ".jpeg", ".gif":
+ return buildImagePost(srcPath, postDir, id)
+
+ case ".mp3":
+ return buildAudioPost(srcPath, postDir, id)
+
+ default:
+ return nil, fmt.Errorf("unsupported file type: %s", ext)
+ }
+}
+
+func buildTextPost(srcPath, id string) (*post.Post, error) {
+ html, err := processTxt(srcPath)
+ if err != nil {
+ return nil, err
+ }
+
+ return &post.Post{
+ ID: id,
+ Timestamp: time.Now().UTC(),
+ PostType: post.TypeText,
+ Content: html,
+ }, nil
+}
+
+func buildMarkdownPost(srcPath, postDir, id string) (*post.Post, error) {
+ html, localImages, err := processMd(srcPath)
+ if err != nil {
+ return nil, err
+ }
+
+ sourceDir := filepath.Dir(srcPath)
+
+ assets, err := copyLocalImages(localImages, sourceDir, postDir)
+ if err != nil {
+ return nil, err
+ }
+
+ // Delete the referenced image files from the input dir so they are not
+ // processed again as independent posts.
+ for _, name := range localImages {
+ _ = os.Remove(filepath.Join(sourceDir, name))
+ }
+
+ return &post.Post{
+ ID: id,
+ Timestamp: time.Now().UTC(),
+ PostType: post.TypeMarkdown,
+ Content: html,
+ Assets: assets,
+ }, nil
+}
+
+func buildImagePost(srcPath, postDir, id string) (*post.Post, error) {
+ filename, html, err := processImage(srcPath, postDir, id)
+ if err != nil {
+ return nil, err
+ }
+
+ return &post.Post{
+ ID: id,
+ Timestamp: time.Now().UTC(),
+ PostType: post.TypeImage,
+ Content: html,
+ Assets: []string{filename},
+ }, nil
+}
+
+func buildAudioPost(srcPath, postDir, id string) (*post.Post, error) {
+ filename, html, err := processAudio(srcPath, postDir, id)
+ if err != nil {
+ return nil, err
+ }
+
+ return &post.Post{
+ ID: id,
+ Timestamp: time.Now().UTC(),
+ PostType: post.TypeAudio,
+ Content: html,
+ Assets: []string{filename},
+ }, nil
+}
+
+// copyLocalImages copies referenced image files from sourceDir into postDir.
+// Returns the list of filenames that were successfully copied.
+func copyLocalImages(filenames []string, sourceDir, postDir string) ([]string, error) {
+ var copied []string
+
+ for _, name := range filenames {
+ src := filepath.Join(sourceDir, name)
+ dst := filepath.Join(postDir, name)
+
+ if err := copyFile(src, dst); err != nil {
+ return nil, fmt.Errorf("copy image asset %s: %w", name, err)
+ }
+
+ copied = append(copied, name)
+ }
+
+ return copied, nil
+}
+
+// uniqueID generates a post ID for the given time that does not already exist
+// as a directory under postsDir. Appends a numeric suffix if needed.
+func uniqueID(postsDir string, t time.Time) string {
+ for i := 0; ; i++ {
+ id := post.NewID(t, i)
+ if _, err := os.Stat(filepath.Join(postsDir, id)); os.IsNotExist(err) {
+ return id
+ }
+ }
+}
diff --git a/internal/processor/txt.go b/internal/processor/txt.go
new file mode 100644
index 0000000..8381271
--- /dev/null
+++ b/internal/processor/txt.go
@@ -0,0 +1,103 @@
+package processor
+
+import (
+ "fmt"
+ "html"
+ "os"
+ "regexp"
+ "strings"
+)
+
+// urlPattern matches http/https URLs in plain text.
+// Trailing sentence punctuation is stripped separately by stripURLTrailing.
+var urlPattern = regexp.MustCompile(`https?://\S+`)
+
+// processTxt reads a plain-text file and wraps each non-empty paragraph in <p> tags.
+// URLs are automatically converted to clickable <a> links.
+// Non-URL text is HTML-escaped to prevent XSS.
+func processTxt(path string) (string, error) {
+ data, err := os.ReadFile(path)
+ if err != nil {
+ return "", fmt.Errorf("read txt %s: %w", path, err)
+ }
+
+ raw := strings.TrimSpace(string(data))
+ if raw == "" {
+ return "<p></p>", nil
+ }
+
+ // Split on blank lines to get logical paragraphs.
+ paragraphs := strings.Split(raw, "\n\n")
+ var sb strings.Builder
+
+ for _, para := range paragraphs {
+ trimmed := strings.TrimSpace(para)
+ if trimmed == "" {
+ continue
+ }
+ fmt.Fprintf(&sb, "<p>%s</p>\n", formatParagraph(trimmed))
+ }
+
+ return sb.String(), nil
+}
+
+// formatParagraph formats a single paragraph: auto-links URLs, escapes non-URL
+// text, and converts single newlines to <br> line breaks.
+func formatParagraph(para string) string {
+ lines := strings.Split(para, "\n")
+ formatted := make([]string, 0, len(lines))
+
+ for _, line := range lines {
+ if t := strings.TrimSpace(line); t != "" {
+ formatted = append(formatted, autolinkLine(t))
+ }
+ }
+
+ return strings.Join(formatted, "<br>\n")
+}
+
+// autolinkLine escapes non-URL text and wraps detected URLs in <a> tags.
+// Opens in a new tab with rel="noopener noreferrer" for security.
+func autolinkLine(line string) string {
+ locs := urlPattern.FindAllStringIndex(line, -1)
+ if len(locs) == 0 {
+ return html.EscapeString(line)
+ }
+
+ var sb strings.Builder
+ prev := 0
+
+ for _, loc := range locs {
+ sb.WriteString(html.EscapeString(line[prev:loc[0]]))
+
+ rawURL := line[loc[0]:loc[1]]
+ cleanURL := stripURLTrailing(rawURL)
+ trailing := rawURL[len(cleanURL):]
+
+ fmt.Fprintf(&sb, `<a href="%s" target="_blank" rel="noopener noreferrer">%s</a>`,
+ html.EscapeString(cleanURL), html.EscapeString(cleanURL))
+
+ if trailing != "" {
+ sb.WriteString(html.EscapeString(trailing))
+ }
+
+ prev = loc[1]
+ }
+
+ sb.WriteString(html.EscapeString(line[prev:]))
+
+ return sb.String()
+}
+
+// stripURLTrailing removes common sentence-ending punctuation from the end of a
+// URL match. These characters are valid in URLs but almost never appear there
+// at the end in prose (e.g. "Visit https://foo.com." — the "." ends the sentence).
+func stripURLTrailing(u string) string {
+ const cutset = ".,;:!?\"')>]}"
+
+ for len(u) > 0 && strings.ContainsRune(cutset, rune(u[len(u)-1])) {
+ u = u[:len(u)-1]
+ }
+
+ return u
+}