diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-11 19:40:27 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-11 19:40:27 +0200 |
| commit | 414d18f158df079d6526d88ae7c689c8212d4d65 (patch) | |
| tree | 6be824c73cfb89dc98f3272edcb32550b7642c07 | |
| parent | cbc041d05a744d902f71d2fc126b7292620e46b2 (diff) | |
fix(showcase): sanitize malformed showcase summariesv0.15.1
| -rw-r--r-- | internal/showcase/rank_history.go | 8 | ||||
| -rw-r--r-- | internal/showcase/rank_history_test.go | 4 | ||||
| -rw-r--r-- | internal/showcase/showcase.go | 342 | ||||
| -rw-r--r-- | internal/showcase/showcase_test.go | 71 | ||||
| -rw-r--r-- | internal/version/version.go | 2 |
5 files changed, 414 insertions, 13 deletions
diff --git a/internal/showcase/rank_history.go b/internal/showcase/rank_history.go index 92c0c33..434332f 100644 --- a/internal/showcase/rank_history.go +++ b/internal/showcase/rank_history.go @@ -178,11 +178,11 @@ func formatRankHistoryForHeader(history []RepoRankHistory) string { tokens := make([]string, 0, len(history)) for i, point := range history { - spot := fmt.Sprintf("#%d", point.Spot) if point.Spot <= 0 { - spot = "n/a" + continue } + spot := fmt.Sprintf("#%d", point.Spot) if i == 0 { tokens = append(tokens, fmt.Sprintf("%s(%s)", spot, point.Anchor)) continue @@ -190,6 +190,10 @@ func formatRankHistoryForHeader(history []RepoRankHistory) string { tokens = append(tokens, fmt.Sprintf("%s%s(%s)", point.Arrow, spot, point.Anchor)) } + if len(tokens) == 0 { + return "" + } + return " [" + strings.Join(tokens, " ") + "]" } diff --git a/internal/showcase/rank_history_test.go b/internal/showcase/rank_history_test.go index 108ed68..f14d23a 100644 --- a/internal/showcase/rank_history_test.go +++ b/internal/showcase/rank_history_test.go @@ -147,7 +147,7 @@ func TestFormatRankHistoryForHeader(t *testing.T) { if !strings.Contains(header, "↓#2(1w)") { t.Fatalf("header missing down movement: %s", header) } - if !strings.Contains(header, "·n/a(3w)") { - t.Fatalf("header missing n/a placeholder: %s", header) + if strings.Contains(header, "n/a") { + t.Fatalf("header should omit missing history points: %s", header) } } diff --git a/internal/showcase/showcase.go b/internal/showcase/showcase.go index dfc3a0d..d8b68ae 100644 --- a/internal/showcase/showcase.go +++ b/internal/showcase/showcase.go @@ -295,18 +295,341 @@ func runSummaryTool(selectedTool, prompt, repoPath, readmeFile string, readmeCon func fallbackSummary(repoName string, readmeContent []byte, readmeFound bool) string { if readmeFound { - parts := strings.Split(strings.TrimSpace(string(readmeContent)), "\n\n") - if len(parts) > 0 { - summary := strings.TrimSpace(parts[0]) - if summary != "" { - return summary - } + if summary := extractUsefulSummary(string(readmeContent), 1); summary != "" { + return summary } } return fmt.Sprintf("%s: source code repository.", repoName) } +func extractUsefulSummary(text string, maxParagraphs int) string { + if maxParagraphs <= 0 { + maxParagraphs = 1 + } + + parts := splitSummaryParagraphs(text) + useful := make([]string, 0, maxParagraphs) + + for _, part := range parts { + part = normalizeSummaryParagraph(part) + if part == "" { + continue + } + + useful = append(useful, part) + if len(useful) >= maxParagraphs { + break + } + } + + return strings.Join(useful, "\n\n") +} + +func normalizeSummaryParagraph(paragraph string) string { + rawParagraph := strings.TrimSpace(paragraph) + switch { + case isHeadingOnlyParagraph(rawParagraph): + return "" + case isImageOnlyParagraph(rawParagraph): + return "" + case isHTMLOnlyParagraph(rawParagraph): + return "" + case isTOCParagraph(rawParagraph): + return "" + case isListOnlyParagraph(rawParagraph): + return "" + case isBadgeParagraph(rawParagraph): + return "" + } + + paragraph = sanitizeSummaryForGemtext(paragraph) + if paragraph == "" { + return "" + } + + if normalized, ok := normalizeManpageParagraph(paragraph); ok { + paragraph = normalized + } + + if isLabelOnlyParagraph(paragraph) { + return "" + } + + return paragraph +} + +func splitSummaryParagraphs(text string) []string { + text = strings.ReplaceAll(text, "\r\n", "\n") + text = strings.TrimSpace(text) + if text == "" { + return nil + } + + rawParts := strings.Split(text, "\n\n") + parts := make([]string, 0, len(rawParts)) + for _, part := range rawParts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + parts = append(parts, part) + } + + return parts +} + +func sanitizeSummaryForGemtext(summary string) string { + summary = strings.ReplaceAll(summary, "\r\n", "\n") + summary = strings.TrimSpace(summary) + if summary == "" { + return "" + } + + lines := strings.Split(summary, "\n") + cleaned := make([]string, 0, len(lines)) + inCodeFence := false + + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + if inCodeFence { + continue + } + cleaned = append(cleaned, "") + continue + } + + if strings.HasPrefix(trimmed, "```") { + inCodeFence = !inCodeFence + continue + } + if inCodeFence { + continue + } + + if isHTMLOnlyLine(trimmed) || isMarkdownImageLine(trimmed) { + continue + } + + if isSetextUnderline(trimmed) && len(cleaned) > 0 && strings.TrimSpace(cleaned[len(cleaned)-1]) != "" { + continue + } + + if heading, ok := trimMarkdownHeading(trimmed); ok { + if heading != "" { + cleaned = append(cleaned, heading) + } + continue + } + + cleaned = append(cleaned, strings.TrimRight(line, " \t")) + } + + return strings.TrimSpace(strings.Join(cleaned, "\n")) +} + +func isHeadingOnlyParagraph(paragraph string) bool { + lines := strings.Split(strings.TrimSpace(strings.ReplaceAll(paragraph, "\r\n", "\n")), "\n") + if len(lines) == 1 { + _, ok := trimMarkdownHeading(strings.TrimSpace(lines[0])) + return ok + } + if len(lines) == 2 { + return strings.TrimSpace(lines[0]) != "" && isSetextUnderline(strings.TrimSpace(lines[1])) + } + return false +} + +func isImageOnlyParagraph(paragraph string) bool { + trimmed := strings.TrimSpace(paragraph) + if trimmed == "" || strings.Contains(trimmed, "\n") { + return false + } + return strings.HasPrefix(trimmed, "<img") || strings.HasPrefix(trimmed, " +} + +func isTOCParagraph(paragraph string) bool { + lines := strings.Split(strings.TrimSpace(paragraph), "\n") + if len(lines) == 0 { + return false + } + + first := strings.TrimSpace(lines[0]) + if !strings.EqualFold(first, "toc:") && !strings.EqualFold(first, "table of contents:") { + return false + } + + for _, line := range lines[1:] { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + if !isOrderedListLine(trimmed) { + return false + } + } + + return true +} + +func isListOnlyParagraph(paragraph string) bool { + lines := strings.Split(strings.TrimSpace(paragraph), "\n") + if len(lines) == 0 { + return false + } + + seen := false + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + if !isListLine(trimmed) { + return false + } + seen = true + } + + return seen +} + +func isListLine(line string) bool { + return strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "- ") || isOrderedListLine(line) +} + +func isOrderedListLine(line string) bool { + if line == "" || line[0] < '0' || line[0] > '9' { + return false + } + + i := 0 + for i < len(line) && line[i] >= '0' && line[i] <= '9' { + i++ + } + if i == 0 || i >= len(line) { + return false + } + if (line[i] != '.' && line[i] != ')') || i+1 >= len(line) || line[i+1] != ' ' { + return false + } + + return true +} + +func isLabelOnlyParagraph(paragraph string) bool { + lines := strings.Split(strings.TrimSpace(paragraph), "\n") + if len(lines) != 1 { + return false + } + + line := strings.TrimSpace(lines[0]) + if line == "" { + return false + } + if strings.HasSuffix(line, ":") && len(strings.Fields(line)) <= 5 { + return true + } + + return line == strings.ToUpper(line) && len(strings.Fields(line)) <= 4 +} + +func isBadgeParagraph(paragraph string) bool { + lines := strings.Split(strings.TrimSpace(paragraph), "\n") + if len(lines) != 1 { + return false + } + + line := strings.TrimSpace(lines[0]) + if line == "" { + return false + } + + markerCount := strings.Count(line, "](") + strings.Count(line, "![") + return markerCount >= 2 +} + +func normalizeManpageParagraph(paragraph string) (string, bool) { + lines := strings.Split(strings.TrimSpace(paragraph), "\n") + if len(lines) < 2 { + return "", false + } + if strings.TrimSpace(lines[0]) != "NAME" { + return "", false + } + + body := make([]string, 0, len(lines)-1) + for _, line := range lines[1:] { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + body = append(body, trimmed) + } + if len(body) == 0 { + return "", false + } + + return strings.Join(body, " "), true +} + +func trimMarkdownHeading(line string) (string, bool) { + if line == "" || !strings.HasPrefix(line, "#") { + return "", false + } + + level := 0 + for level < len(line) && line[level] == '#' { + level++ + } + if level == 0 || level > 6 { + return "", false + } + if level < len(line) && line[level] != ' ' && line[level] != '\t' { + return "", false + } + + heading := strings.TrimSpace(line[level:]) + heading = strings.TrimSpace(strings.TrimRight(heading, "#")) + return heading, true +} + +func isSetextUnderline(line string) bool { + if len(line) < 3 { + return false + } + return strings.Trim(line, "=") == "" || strings.Trim(line, "-") == "" +} + // getRepositories returns a list of repository directories in the work directory func (g *Generator) getRepositories() ([]string, error) { entries, err := os.ReadDir(g.workDir) @@ -397,6 +720,11 @@ func (g *Generator) generateProjectSummary(repoName string, forceRegenerate bool summary = fallbackSummary(repoName, readmeContent, readmeFound) } } + summary = extractUsefulSummary(summary, 2) + if summary == "" { + summary = fallbackSummary(repoName, readmeContent, readmeFound) + } + summary = sanitizeSummaryForGemtext(summary) // Build URLs codebergURL, githubURL := g.buildProjectLinks(repoName) @@ -608,7 +936,7 @@ func (g *Generator) formatGemtext(summaries []ProjectSummary) string { } // Handle images and paragraphs - paragraphs := strings.Split(summary.Summary, "\n\n") + paragraphs := splitSummaryParagraphs(sanitizeSummaryForGemtext(summary.Summary)) // If we have images, distribute them nicely if len(summary.Images) > 0 { diff --git a/internal/showcase/showcase_test.go b/internal/showcase/showcase_test.go index fb20c96..125de63 100644 --- a/internal/showcase/showcase_test.go +++ b/internal/showcase/showcase_test.go @@ -120,11 +120,33 @@ func TestFormatGemtext_IncludesRankHistoryInHeader(t *testing.T) { }, }) - if !strings.Contains(content, "### 1. alpha [#1(now) ↑#2(1w) →#2(2w) ·n/a(3w) ↓#4(4w)]") { + if !strings.Contains(content, "### 1. alpha [#1(now) ↑#2(1w) →#2(2w) ↓#4(4w)]") { t.Fatalf("rank history was not rendered in header: %s", content) } } +func TestFormatGemtext_SanitizesMarkdownHeadingsInSummary(t *testing.T) { + t.Parallel() + + g := &Generator{config: &config.Config{}} + content := g.formatGemtext([]ProjectSummary{ + { + Name: "alpha", + Summary: "# Alpha Project\n\nconf\n====\n\nParagraph body", + }, + }) + + if strings.Contains(content, "\n# Alpha Project\n") { + t.Fatalf("markdown heading leaked into gemtext summary: %s", content) + } + if strings.Contains(content, "\n====\n") { + t.Fatalf("setext underline leaked into gemtext summary: %s", content) + } + if !strings.Contains(content, "\nAlpha Project\n\nconf\n\nParagraph body\n\n") { + t.Fatalf("sanitized summary not rendered as expected: %s", content) + } +} + func TestFindReadmeContent_UsesRepoPathWithoutChangingCWD(t *testing.T) { t.Parallel() @@ -160,3 +182,50 @@ func TestFallbackSummary_UsesFirstReadmeParagraph(t *testing.T) { t.Fatalf("expected first paragraph summary, got %q", summary) } } + +func TestFallbackSummary_SkipsHeadingOnlyParagraphs(t *testing.T) { + t.Parallel() + + readme := []byte("# repo title\n\n<img src=\"shot.png\" />\n\nactual summary paragraph") + summary := fallbackSummary("repo", readme, true) + + if summary != "actual summary paragraph" { + t.Fatalf("expected summary paragraph after heading and image, got %q", summary) + } +} + +func TestExtractUsefulSummary_SkipsNonProseParagraphs(t *testing.T) { + t.Parallel() + + input := "<p align=\"center\">\n<img src=\"shot.png\" />\n</p>\n\n* first bullet\n* second bullet\n\nTOC:\n01. Intro\n02. Usage\n\nActual summary paragraph.\n\nSecond useful paragraph." + got := extractUsefulSummary(input, 2) + want := "Actual summary paragraph.\n\nSecond useful paragraph." + + if got != want { + t.Fatalf("extractUsefulSummary() = %q, want %q", got, want) + } +} + +func TestExtractUsefulSummary_NormalizesManpageNameSection(t *testing.T) { + t.Parallel() + + input := "NAME\n cpuinfo - A small and humble tool to print out CPU data" + got := extractUsefulSummary(input, 1) + want := "cpuinfo - A small and humble tool to print out CPU data" + + if got != want { + t.Fatalf("extractUsefulSummary() = %q, want %q", got, want) + } +} + +func TestExtractUsefulSummary_SkipsFencedCodeBlocks(t *testing.T) { + t.Parallel() + + input := "```sh\nsudo dnf install wireguard-tools\nbundler install\n```\n\nActual summary paragraph." + got := extractUsefulSummary(input, 1) + want := "Actual summary paragraph." + + if got != want { + t.Fatalf("extractUsefulSummary() = %q, want %q", got, want) + } +} diff --git a/internal/version/version.go b/internal/version/version.go index 04e6752..8a7c099 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -7,7 +7,7 @@ import ( var ( // Version is the current version of gitsyncer - Version = "0.15.0" + Version = "0.15.1" // GitCommit is the git commit hash at build time GitCommit = "unknown" |
