1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
package processor
import (
"bytes"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)
// isSimpleImageRef returns true for a filename-only reference (e.g.
// "img.png") that is safe to treat as a flat local file in the same
// directory as the markdown source. It rejects subdirectories, absolute
// paths, dot-slash prefixes, and parent-directory traversal so stat and
// copy targets stay within the source directory.
func isSimpleImageRef(ref string) bool {
if strings.Contains(ref, "..") {
return false
}
return filepath.Base(ref) == ref
}
// imageRefPattern matches Markdown image syntax: 
// We use it to discover local asset references that must be copied.
var imageRefPattern = regexp.MustCompile(`!\[[^\]]*\]\(([^)]+)\)`)
// processMd converts a Markdown file to an HTML snippet for a trusted inbox source.
// The markdown (including any raw HTML blocks) is treated as author-controlled
// content, not user-generated input from strangers; see the package comment.
//
// Returns the HTML and a list of local image filenames referenced in the document.
// Referenced images that exist alongside the source file are returned so the
// caller can copy them into the post asset directory.
func processMd(path string) (htmlContent string, localImages []string, err error) {
data, err := os.ReadFile(path)
if err != nil {
return "", nil, fmt.Errorf("read markdown %s: %w", path, err)
}
// Collect local image references so the caller can copy them as assets.
localImages = findLocalImages(string(data), filepath.Dir(path))
md := goldmark.New(
goldmark.WithExtensions(extension.GFM),
goldmark.WithRendererOptions(
// Trusted inbox: preserve raw HTML in markdown (see package comment).
html.WithUnsafe(),
),
)
var buf bytes.Buffer
if err := md.Convert(data, &buf); err != nil {
return "", nil, fmt.Errorf("convert markdown %s: %w", path, err)
}
return buf.String(), localImages, nil
}
// findLocalImages returns image filenames referenced in markdown that actually
// exist in sourceDir. Remote URLs (http/https) are ignored.
func findLocalImages(mdContent, sourceDir string) []string {
matches := imageRefPattern.FindAllStringSubmatch(mdContent, -1)
var locals []string
for _, m := range matches {
ref := m[1]
// Skip remote URLs.
if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") {
continue
}
// Reject references that traverse directories or contain path
// separators; only flat filenames next to the markdown are
// supported. This prevents scans from succeeding on a file
// deep in a subdirectory and then failing copy because the
// basename is looked up in the wrong directory.
if !isSimpleImageRef(ref) {
continue
}
candidate := filepath.Join(sourceDir, ref)
if _, err := os.Stat(candidate); err == nil {
locals = append(locals, ref)
}
}
return locals
}
|