summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-03-11 19:40:27 +0200
committerPaul Buetow <paul@buetow.org>2026-03-11 19:40:27 +0200
commit414d18f158df079d6526d88ae7c689c8212d4d65 (patch)
tree6be824c73cfb89dc98f3272edcb32550b7642c07 /internal
parentcbc041d05a744d902f71d2fc126b7292620e46b2 (diff)
fix(showcase): sanitize malformed showcase summariesv0.15.1
Diffstat (limited to 'internal')
-rw-r--r--internal/showcase/rank_history.go8
-rw-r--r--internal/showcase/rank_history_test.go4
-rw-r--r--internal/showcase/showcase.go342
-rw-r--r--internal/showcase/showcase_test.go71
-rw-r--r--internal/version/version.go2
5 files changed, 414 insertions, 13 deletions
diff --git a/internal/showcase/rank_history.go b/internal/showcase/rank_history.go
index 92c0c33..434332f 100644
--- a/internal/showcase/rank_history.go
+++ b/internal/showcase/rank_history.go
@@ -178,11 +178,11 @@ func formatRankHistoryForHeader(history []RepoRankHistory) string {
tokens := make([]string, 0, len(history))
for i, point := range history {
- spot := fmt.Sprintf("#%d", point.Spot)
if point.Spot <= 0 {
- spot = "n/a"
+ continue
}
+ spot := fmt.Sprintf("#%d", point.Spot)
if i == 0 {
tokens = append(tokens, fmt.Sprintf("%s(%s)", spot, point.Anchor))
continue
@@ -190,6 +190,10 @@ func formatRankHistoryForHeader(history []RepoRankHistory) string {
tokens = append(tokens, fmt.Sprintf("%s%s(%s)", point.Arrow, spot, point.Anchor))
}
+ if len(tokens) == 0 {
+ return ""
+ }
+
return " [" + strings.Join(tokens, " ") + "]"
}
diff --git a/internal/showcase/rank_history_test.go b/internal/showcase/rank_history_test.go
index 108ed68..f14d23a 100644
--- a/internal/showcase/rank_history_test.go
+++ b/internal/showcase/rank_history_test.go
@@ -147,7 +147,7 @@ func TestFormatRankHistoryForHeader(t *testing.T) {
if !strings.Contains(header, "↓#2(1w)") {
t.Fatalf("header missing down movement: %s", header)
}
- if !strings.Contains(header, "·n/a(3w)") {
- t.Fatalf("header missing n/a placeholder: %s", header)
+ if strings.Contains(header, "n/a") {
+ t.Fatalf("header should omit missing history points: %s", header)
}
}
diff --git a/internal/showcase/showcase.go b/internal/showcase/showcase.go
index dfc3a0d..d8b68ae 100644
--- a/internal/showcase/showcase.go
+++ b/internal/showcase/showcase.go
@@ -295,18 +295,341 @@ func runSummaryTool(selectedTool, prompt, repoPath, readmeFile string, readmeCon
func fallbackSummary(repoName string, readmeContent []byte, readmeFound bool) string {
if readmeFound {
- parts := strings.Split(strings.TrimSpace(string(readmeContent)), "\n\n")
- if len(parts) > 0 {
- summary := strings.TrimSpace(parts[0])
- if summary != "" {
- return summary
- }
+ if summary := extractUsefulSummary(string(readmeContent), 1); summary != "" {
+ return summary
}
}
return fmt.Sprintf("%s: source code repository.", repoName)
}
+func extractUsefulSummary(text string, maxParagraphs int) string {
+ if maxParagraphs <= 0 {
+ maxParagraphs = 1
+ }
+
+ parts := splitSummaryParagraphs(text)
+ useful := make([]string, 0, maxParagraphs)
+
+ for _, part := range parts {
+ part = normalizeSummaryParagraph(part)
+ if part == "" {
+ continue
+ }
+
+ useful = append(useful, part)
+ if len(useful) >= maxParagraphs {
+ break
+ }
+ }
+
+ return strings.Join(useful, "\n\n")
+}
+
+func normalizeSummaryParagraph(paragraph string) string {
+ rawParagraph := strings.TrimSpace(paragraph)
+ switch {
+ case isHeadingOnlyParagraph(rawParagraph):
+ return ""
+ case isImageOnlyParagraph(rawParagraph):
+ return ""
+ case isHTMLOnlyParagraph(rawParagraph):
+ return ""
+ case isTOCParagraph(rawParagraph):
+ return ""
+ case isListOnlyParagraph(rawParagraph):
+ return ""
+ case isBadgeParagraph(rawParagraph):
+ return ""
+ }
+
+ paragraph = sanitizeSummaryForGemtext(paragraph)
+ if paragraph == "" {
+ return ""
+ }
+
+ if normalized, ok := normalizeManpageParagraph(paragraph); ok {
+ paragraph = normalized
+ }
+
+ if isLabelOnlyParagraph(paragraph) {
+ return ""
+ }
+
+ return paragraph
+}
+
+func splitSummaryParagraphs(text string) []string {
+ text = strings.ReplaceAll(text, "\r\n", "\n")
+ text = strings.TrimSpace(text)
+ if text == "" {
+ return nil
+ }
+
+ rawParts := strings.Split(text, "\n\n")
+ parts := make([]string, 0, len(rawParts))
+ for _, part := range rawParts {
+ part = strings.TrimSpace(part)
+ if part == "" {
+ continue
+ }
+ parts = append(parts, part)
+ }
+
+ return parts
+}
+
+func sanitizeSummaryForGemtext(summary string) string {
+ summary = strings.ReplaceAll(summary, "\r\n", "\n")
+ summary = strings.TrimSpace(summary)
+ if summary == "" {
+ return ""
+ }
+
+ lines := strings.Split(summary, "\n")
+ cleaned := make([]string, 0, len(lines))
+ inCodeFence := false
+
+ for _, line := range lines {
+ trimmed := strings.TrimSpace(line)
+ if trimmed == "" {
+ if inCodeFence {
+ continue
+ }
+ cleaned = append(cleaned, "")
+ continue
+ }
+
+ if strings.HasPrefix(trimmed, "```") {
+ inCodeFence = !inCodeFence
+ continue
+ }
+ if inCodeFence {
+ continue
+ }
+
+ if isHTMLOnlyLine(trimmed) || isMarkdownImageLine(trimmed) {
+ continue
+ }
+
+ if isSetextUnderline(trimmed) && len(cleaned) > 0 && strings.TrimSpace(cleaned[len(cleaned)-1]) != "" {
+ continue
+ }
+
+ if heading, ok := trimMarkdownHeading(trimmed); ok {
+ if heading != "" {
+ cleaned = append(cleaned, heading)
+ }
+ continue
+ }
+
+ cleaned = append(cleaned, strings.TrimRight(line, " \t"))
+ }
+
+ return strings.TrimSpace(strings.Join(cleaned, "\n"))
+}
+
+func isHeadingOnlyParagraph(paragraph string) bool {
+ lines := strings.Split(strings.TrimSpace(strings.ReplaceAll(paragraph, "\r\n", "\n")), "\n")
+ if len(lines) == 1 {
+ _, ok := trimMarkdownHeading(strings.TrimSpace(lines[0]))
+ return ok
+ }
+ if len(lines) == 2 {
+ return strings.TrimSpace(lines[0]) != "" && isSetextUnderline(strings.TrimSpace(lines[1]))
+ }
+ return false
+}
+
+func isImageOnlyParagraph(paragraph string) bool {
+ trimmed := strings.TrimSpace(paragraph)
+ if trimmed == "" || strings.Contains(trimmed, "\n") {
+ return false
+ }
+ return strings.HasPrefix(trimmed, "<img") || strings.HasPrefix(trimmed, "![")
+}
+
+func isHTMLOnlyParagraph(paragraph string) bool {
+ lines := strings.Split(strings.TrimSpace(paragraph), "\n")
+ if len(lines) == 0 {
+ return false
+ }
+
+ seen := false
+ for _, line := range lines {
+ trimmed := strings.TrimSpace(line)
+ if trimmed == "" {
+ continue
+ }
+ if !isHTMLOnlyLine(trimmed) {
+ return false
+ }
+ seen = true
+ }
+
+ return seen
+}
+
+func isHTMLOnlyLine(line string) bool {
+ return strings.HasPrefix(line, "<") && strings.HasSuffix(line, ">")
+}
+
+func isMarkdownImageLine(line string) bool {
+ return strings.HasPrefix(line, "![") && strings.Contains(line, "](")
+}
+
+func isTOCParagraph(paragraph string) bool {
+ lines := strings.Split(strings.TrimSpace(paragraph), "\n")
+ if len(lines) == 0 {
+ return false
+ }
+
+ first := strings.TrimSpace(lines[0])
+ if !strings.EqualFold(first, "toc:") && !strings.EqualFold(first, "table of contents:") {
+ return false
+ }
+
+ for _, line := range lines[1:] {
+ trimmed := strings.TrimSpace(line)
+ if trimmed == "" {
+ continue
+ }
+ if !isOrderedListLine(trimmed) {
+ return false
+ }
+ }
+
+ return true
+}
+
+func isListOnlyParagraph(paragraph string) bool {
+ lines := strings.Split(strings.TrimSpace(paragraph), "\n")
+ if len(lines) == 0 {
+ return false
+ }
+
+ seen := false
+ for _, line := range lines {
+ trimmed := strings.TrimSpace(line)
+ if trimmed == "" {
+ continue
+ }
+ if !isListLine(trimmed) {
+ return false
+ }
+ seen = true
+ }
+
+ return seen
+}
+
+func isListLine(line string) bool {
+ return strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "- ") || isOrderedListLine(line)
+}
+
+func isOrderedListLine(line string) bool {
+ if line == "" || line[0] < '0' || line[0] > '9' {
+ return false
+ }
+
+ i := 0
+ for i < len(line) && line[i] >= '0' && line[i] <= '9' {
+ i++
+ }
+ if i == 0 || i >= len(line) {
+ return false
+ }
+ if (line[i] != '.' && line[i] != ')') || i+1 >= len(line) || line[i+1] != ' ' {
+ return false
+ }
+
+ return true
+}
+
+func isLabelOnlyParagraph(paragraph string) bool {
+ lines := strings.Split(strings.TrimSpace(paragraph), "\n")
+ if len(lines) != 1 {
+ return false
+ }
+
+ line := strings.TrimSpace(lines[0])
+ if line == "" {
+ return false
+ }
+ if strings.HasSuffix(line, ":") && len(strings.Fields(line)) <= 5 {
+ return true
+ }
+
+ return line == strings.ToUpper(line) && len(strings.Fields(line)) <= 4
+}
+
+func isBadgeParagraph(paragraph string) bool {
+ lines := strings.Split(strings.TrimSpace(paragraph), "\n")
+ if len(lines) != 1 {
+ return false
+ }
+
+ line := strings.TrimSpace(lines[0])
+ if line == "" {
+ return false
+ }
+
+ markerCount := strings.Count(line, "](") + strings.Count(line, "![")
+ return markerCount >= 2
+}
+
+func normalizeManpageParagraph(paragraph string) (string, bool) {
+ lines := strings.Split(strings.TrimSpace(paragraph), "\n")
+ if len(lines) < 2 {
+ return "", false
+ }
+ if strings.TrimSpace(lines[0]) != "NAME" {
+ return "", false
+ }
+
+ body := make([]string, 0, len(lines)-1)
+ for _, line := range lines[1:] {
+ trimmed := strings.TrimSpace(line)
+ if trimmed == "" {
+ continue
+ }
+ body = append(body, trimmed)
+ }
+ if len(body) == 0 {
+ return "", false
+ }
+
+ return strings.Join(body, " "), true
+}
+
+func trimMarkdownHeading(line string) (string, bool) {
+ if line == "" || !strings.HasPrefix(line, "#") {
+ return "", false
+ }
+
+ level := 0
+ for level < len(line) && line[level] == '#' {
+ level++
+ }
+ if level == 0 || level > 6 {
+ return "", false
+ }
+ if level < len(line) && line[level] != ' ' && line[level] != '\t' {
+ return "", false
+ }
+
+ heading := strings.TrimSpace(line[level:])
+ heading = strings.TrimSpace(strings.TrimRight(heading, "#"))
+ return heading, true
+}
+
+func isSetextUnderline(line string) bool {
+ if len(line) < 3 {
+ return false
+ }
+ return strings.Trim(line, "=") == "" || strings.Trim(line, "-") == ""
+}
+
// getRepositories returns a list of repository directories in the work directory
func (g *Generator) getRepositories() ([]string, error) {
entries, err := os.ReadDir(g.workDir)
@@ -397,6 +720,11 @@ func (g *Generator) generateProjectSummary(repoName string, forceRegenerate bool
summary = fallbackSummary(repoName, readmeContent, readmeFound)
}
}
+ summary = extractUsefulSummary(summary, 2)
+ if summary == "" {
+ summary = fallbackSummary(repoName, readmeContent, readmeFound)
+ }
+ summary = sanitizeSummaryForGemtext(summary)
// Build URLs
codebergURL, githubURL := g.buildProjectLinks(repoName)
@@ -608,7 +936,7 @@ func (g *Generator) formatGemtext(summaries []ProjectSummary) string {
}
// Handle images and paragraphs
- paragraphs := strings.Split(summary.Summary, "\n\n")
+ paragraphs := splitSummaryParagraphs(sanitizeSummaryForGemtext(summary.Summary))
// If we have images, distribute them nicely
if len(summary.Images) > 0 {
diff --git a/internal/showcase/showcase_test.go b/internal/showcase/showcase_test.go
index fb20c96..125de63 100644
--- a/internal/showcase/showcase_test.go
+++ b/internal/showcase/showcase_test.go
@@ -120,11 +120,33 @@ func TestFormatGemtext_IncludesRankHistoryInHeader(t *testing.T) {
},
})
- if !strings.Contains(content, "### 1. alpha [#1(now) ↑#2(1w) →#2(2w) ·n/a(3w) ↓#4(4w)]") {
+ if !strings.Contains(content, "### 1. alpha [#1(now) ↑#2(1w) →#2(2w) ↓#4(4w)]") {
t.Fatalf("rank history was not rendered in header: %s", content)
}
}
+func TestFormatGemtext_SanitizesMarkdownHeadingsInSummary(t *testing.T) {
+ t.Parallel()
+
+ g := &Generator{config: &config.Config{}}
+ content := g.formatGemtext([]ProjectSummary{
+ {
+ Name: "alpha",
+ Summary: "# Alpha Project\n\nconf\n====\n\nParagraph body",
+ },
+ })
+
+ if strings.Contains(content, "\n# Alpha Project\n") {
+ t.Fatalf("markdown heading leaked into gemtext summary: %s", content)
+ }
+ if strings.Contains(content, "\n====\n") {
+ t.Fatalf("setext underline leaked into gemtext summary: %s", content)
+ }
+ if !strings.Contains(content, "\nAlpha Project\n\nconf\n\nParagraph body\n\n") {
+ t.Fatalf("sanitized summary not rendered as expected: %s", content)
+ }
+}
+
func TestFindReadmeContent_UsesRepoPathWithoutChangingCWD(t *testing.T) {
t.Parallel()
@@ -160,3 +182,50 @@ func TestFallbackSummary_UsesFirstReadmeParagraph(t *testing.T) {
t.Fatalf("expected first paragraph summary, got %q", summary)
}
}
+
+func TestFallbackSummary_SkipsHeadingOnlyParagraphs(t *testing.T) {
+ t.Parallel()
+
+ readme := []byte("# repo title\n\n<img src=\"shot.png\" />\n\nactual summary paragraph")
+ summary := fallbackSummary("repo", readme, true)
+
+ if summary != "actual summary paragraph" {
+ t.Fatalf("expected summary paragraph after heading and image, got %q", summary)
+ }
+}
+
+func TestExtractUsefulSummary_SkipsNonProseParagraphs(t *testing.T) {
+ t.Parallel()
+
+ input := "<p align=\"center\">\n<img src=\"shot.png\" />\n</p>\n\n* first bullet\n* second bullet\n\nTOC:\n01. Intro\n02. Usage\n\nActual summary paragraph.\n\nSecond useful paragraph."
+ got := extractUsefulSummary(input, 2)
+ want := "Actual summary paragraph.\n\nSecond useful paragraph."
+
+ if got != want {
+ t.Fatalf("extractUsefulSummary() = %q, want %q", got, want)
+ }
+}
+
+func TestExtractUsefulSummary_NormalizesManpageNameSection(t *testing.T) {
+ t.Parallel()
+
+ input := "NAME\n cpuinfo - A small and humble tool to print out CPU data"
+ got := extractUsefulSummary(input, 1)
+ want := "cpuinfo - A small and humble tool to print out CPU data"
+
+ if got != want {
+ t.Fatalf("extractUsefulSummary() = %q, want %q", got, want)
+ }
+}
+
+func TestExtractUsefulSummary_SkipsFencedCodeBlocks(t *testing.T) {
+ t.Parallel()
+
+ input := "```sh\nsudo dnf install wireguard-tools\nbundler install\n```\n\nActual summary paragraph."
+ got := extractUsefulSummary(input, 1)
+ want := "Actual summary paragraph."
+
+ if got != want {
+ t.Fatalf("extractUsefulSummary() = %q, want %q", got, want)
+ }
+}
diff --git a/internal/version/version.go b/internal/version/version.go
index 04e6752..8a7c099 100644
--- a/internal/version/version.go
+++ b/internal/version/version.go
@@ -7,7 +7,7 @@ import (
var (
// Version is the current version of gitsyncer
- Version = "0.15.0"
+ Version = "0.15.1"
// GitCommit is the git commit hash at build time
GitCommit = "unknown"