1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
|
// Package processor scans the input directory for new source files and converts
// each one into a self-contained post directory under outdir/posts/.
// Supported formats: .txt, .md, .png, .jpg, .jpeg, .gif, .mp3.
// Each processed source file is deleted from the input directory afterward.
//
// Processing is sequential in directory listing order. If one file fails after
// earlier files succeeded, those earlier sources are already gone from the input
// directory (and their posts exist under posts/). The error is returned together
// with the count of posts created in that run; fix or remove the failing file and
// re-run to continue.
//
// Markdown trust boundary: .md files are expected only from a trusted personal
// inbox (the operator’s own email or equivalent). Goldmark is configured with
// html.WithUnsafe so raw HTML and GFM features in those files pass through to
// post HTML intentionally. This is not a multi-tenant or public-submission
// pipeline; do not point an untrusted drop folder at the same input directory
// without replacing that rendering path with sanitization or a stricter parser.
package processor
import (
"fmt"
"os"
"path/filepath"
"strings"
"time"
"codeberg.org/snonux/snonux/internal/config"
"codeberg.org/snonux/snonux/internal/post"
)
// Run scans cfg.InputDir and processes every eligible file into a post directory
// under cfg.OutputDir/posts/. Returns the number of posts successfully created
// in this invocation. On error, that count includes only files processed before
// the failure; those sources have already been removed from the input directory.
//
// Images referenced by a .md file in the same input directory are consumed by
// that markdown post and are not processed as independent image posts.
func Run(cfg *config.Config) (int, error) {
entries, err := os.ReadDir(cfg.InputDir)
if err != nil {
return 0, fmt.Errorf("read input dir %s: %w", cfg.InputDir, err)
}
postsDir := filepath.Join(cfg.OutputDir, "posts")
if err := os.MkdirAll(postsDir, 0o755); err != nil {
return 0, fmt.Errorf("create posts dir: %w", err)
}
// Pre-scan markdown files to discover which image filenames they claim.
// Claimed images are excluded from independent processing.
claimed, err := claimedByMarkdown(entries, cfg.InputDir)
if err != nil {
return 0, err
}
count := 0
for _, entry := range entries {
if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") {
continue
}
if claimed[entry.Name()] {
continue // consumed by a .md post — skip independent processing
}
srcPath := filepath.Join(cfg.InputDir, entry.Name())
if err := processFile(srcPath, postsDir); err != nil {
return count, fmt.Errorf("process %s: %w", entry.Name(), err)
}
count++
}
return count, nil
}
// claimedByMarkdown scans all .md entries in inputDir and returns a set of
// image filenames that are referenced within those markdown files.
// Those images should be embedded in the markdown post, not processed alone.
// If two different markdown files claim the same image, an error is returned.
func claimedByMarkdown(entries []os.DirEntry, inputDir string) (map[string]bool, error) {
claimed := make(map[string]bool)
// owners tracks which markdown file first claimed each image so we can
// detect conflicts before processing begins.
owners := make(map[string]string)
for _, entry := range entries {
if entry.IsDir() || strings.ToLower(filepath.Ext(entry.Name())) != ".md" {
continue
}
mdPath := filepath.Join(inputDir, entry.Name())
data, err := os.ReadFile(mdPath)
if err != nil {
return nil, fmt.Errorf("read markdown for image claims %s: %w", entry.Name(), err)
}
for _, imgName := range findLocalImages(string(data), inputDir) {
if owner, exists := owners[imgName]; exists && owner != entry.Name() {
return nil, fmt.Errorf("image %q claimed by both %q and %q", imgName, owner, entry.Name())
}
owners[imgName] = entry.Name()
claimed[imgName] = true
}
}
return claimed, nil
}
// processFile processes a single input file into a new post directory.
// The source file is removed from the input dir on success.
func processFile(srcPath, postsDir string) error {
now := time.Now().UTC()
id, err := uniqueID(postsDir, now)
if err != nil {
return fmt.Errorf("generate unique ID: %w", err)
}
postDir := filepath.Join(postsDir, id)
if err := os.MkdirAll(postDir, 0o755); err != nil {
return fmt.Errorf("create post dir %s: %w", id, err)
}
p, inboxExtras, err := buildPost(srcPath, postDir, id)
if err != nil {
// Clean up the half-created directory to avoid partial state.
_ = os.RemoveAll(postDir)
return err
}
if err := p.Save(postDir); err != nil {
_ = os.RemoveAll(postDir)
return err
}
// Remove markdown-referenced inbox images only after the post is persisted
// (same ordering as the main source file below).
for _, path := range inboxExtras {
_ = os.Remove(path)
}
// Delete the source file only after the post has been successfully persisted.
return os.Remove(srcPath)
}
// buildPost dispatches to the appropriate sub-processor based on file extension
// and returns a populated Post ready to be saved. inboxExtras lists absolute
// paths under the input directory to remove after Save succeeds (markdown-local
// images only); other post types return a nil slice.
func buildPost(srcPath, postDir, id string) (*post.Post, []string, error) {
ext := strings.ToLower(filepath.Ext(srcPath))
switch ext {
case ".txt":
p, err := buildTextPost(srcPath, id)
return p, nil, err
case ".md":
return buildMarkdownPost(srcPath, postDir, id)
case ".png", ".jpg", ".jpeg", ".gif":
p, err := buildImagePost(srcPath, postDir, id)
return p, nil, err
case ".mp3":
p, err := buildAudioPost(srcPath, postDir, id)
return p, nil, err
default:
return nil, nil, fmt.Errorf("unsupported file type: %s", ext)
}
}
func buildTextPost(srcPath, id string) (*post.Post, error) {
html, err := processTxt(srcPath)
if err != nil {
return nil, err
}
return &post.Post{
ID: id,
Timestamp: time.Now().UTC(),
PostType: post.TypeText,
Content: html,
}, nil
}
func buildMarkdownPost(srcPath, postDir, id string) (*post.Post, []string, error) {
html, localImages, err := processMd(srcPath)
if err != nil {
return nil, nil, err
}
sourceDir := filepath.Dir(srcPath)
assets, err := copyLocalImages(localImages, sourceDir, postDir)
if err != nil {
return nil, nil, err
}
// Rewrite bare image filenames to site-root-relative paths so they
// resolve correctly in the generated HTML (e.g. "img.png" → "posts/ID/img.png").
for _, name := range localImages {
html = strings.ReplaceAll(html,
fmt.Sprintf(`src="%s"`, name),
fmt.Sprintf(`src="posts/%s/%s"`, id, name))
}
inboxExtras := make([]string, 0, len(localImages))
for _, name := range localImages {
inboxExtras = append(inboxExtras, filepath.Join(sourceDir, name))
}
return &post.Post{
ID: id,
Timestamp: time.Now().UTC(),
PostType: post.TypeMarkdown,
Content: html,
Assets: assets,
}, inboxExtras, nil
}
func buildImagePost(srcPath, postDir, id string) (*post.Post, error) {
filename, html, err := processImage(srcPath, postDir, id)
if err != nil {
return nil, err
}
return &post.Post{
ID: id,
Timestamp: time.Now().UTC(),
PostType: post.TypeImage,
Content: html,
Assets: []string{filename},
}, nil
}
func buildAudioPost(srcPath, postDir, id string) (*post.Post, error) {
filename, html, err := processAudio(srcPath, postDir, id)
if err != nil {
return nil, err
}
return &post.Post{
ID: id,
Timestamp: time.Now().UTC(),
PostType: post.TypeAudio,
Content: html,
Assets: []string{filename},
}, nil
}
// copyLocalImages copies referenced image files from sourceDir into postDir.
// Returns the list of filenames that were successfully copied.
func copyLocalImages(filenames []string, sourceDir, postDir string) ([]string, error) {
var copied []string
for _, name := range filenames {
src := filepath.Join(sourceDir, name)
dst := filepath.Join(postDir, name)
if err := copyFile(src, dst); err != nil {
return nil, fmt.Errorf("copy image asset %s: %w", name, err)
}
copied = append(copied, name)
}
return copied, nil
}
// uniqueID generates a post ID for the given time that does not already exist
// as a directory under postsDir. Appends a numeric suffix if needed.
func uniqueID(postsDir string, t time.Time) (string, error) {
for i := 0; ; i++ {
id := post.NewID(t, i)
_, err := os.Stat(filepath.Join(postsDir, id))
if err != nil {
if os.IsNotExist(err) {
return id, nil
}
return "", fmt.Errorf("stat post dir %s: %w", id, err)
}
}
}
|