From c7d3775bb999bda0d134de900b28f2ebeb4cec67 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sun, 5 Apr 2026 00:05:10 +0100 Subject: [PATCH] Plain text: Created a new HTML to plain text converter To centralise logic to be more consistent, and to have smarter logic which avoids just following newline format from input, preventing smushing HTML elements (like list elements) next to eachother --- app/Activity/Models/Comment.php | 7 +++ .../Messages/CommentCreationNotification.php | 2 +- .../Messages/CommentMentionNotification.php | 2 +- app/Entities/Repos/BaseRepo.php | 4 +- app/Entities/Tools/PageContent.php | 5 +- app/Util/HtmlToPlainText.php | 47 ++++++++++++++ tests/Util/HtmlToPlainTextTest.php | 63 +++++++++++++++++++ 7 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 app/Util/HtmlToPlainText.php create mode 100644 tests/Util/HtmlToPlainTextTest.php diff --git a/app/Activity/Models/Comment.php b/app/Activity/Models/Comment.php index ab7d91772..3faa76657 100644 --- a/app/Activity/Models/Comment.php +++ b/app/Activity/Models/Comment.php @@ -9,6 +9,7 @@ use BookStack\Users\Models\HasCreatorAndUpdater; use BookStack\Users\Models\OwnableInterface; use BookStack\Util\HtmlContentFilter; use BookStack\Util\HtmlContentFilterConfig; +use BookStack\Util\HtmlToPlainText; use Illuminate\Database\Eloquent\Builder; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\BelongsTo; @@ -87,6 +88,12 @@ class Comment extends Model implements Loggable, OwnableInterface return $filter->filterString($this->html ?? ''); } + public function getPlainText(): string + { + $converter = new HtmlToPlainText(); + return $converter->convert($this->html ?? ''); + } + public function jointPermissions(): HasMany { return $this->hasMany(JointPermission::class, 'entity_id', 'commentable_id') diff --git a/app/Activity/Notifications/Messages/CommentCreationNotification.php b/app/Activity/Notifications/Messages/CommentCreationNotification.php index 30d0ffa2b..d739f4aab 100644 --- a/app/Activity/Notifications/Messages/CommentCreationNotification.php +++ b/app/Activity/Notifications/Messages/CommentCreationNotification.php @@ -24,7 +24,7 @@ class CommentCreationNotification extends BaseActivityNotification $locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page), $locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable), $locale->trans('notifications.detail_commenter') => $this->user->name, - $locale->trans('notifications.detail_comment') => strip_tags($comment->html), + $locale->trans('notifications.detail_comment') => $comment->getPlainText(), ]); return $this->newMailMessage($locale) diff --git a/app/Activity/Notifications/Messages/CommentMentionNotification.php b/app/Activity/Notifications/Messages/CommentMentionNotification.php index de9e71963..4c8ee5bab 100644 --- a/app/Activity/Notifications/Messages/CommentMentionNotification.php +++ b/app/Activity/Notifications/Messages/CommentMentionNotification.php @@ -24,7 +24,7 @@ class CommentMentionNotification extends BaseActivityNotification $locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page), $locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable), $locale->trans('notifications.detail_commenter') => $this->user->name, - $locale->trans('notifications.detail_comment') => strip_tags($comment->html), + $locale->trans('notifications.detail_comment') => $comment->getPlainText(), ]); return $this->newMailMessage($locale) diff --git a/app/Entities/Repos/BaseRepo.php b/app/Entities/Repos/BaseRepo.php index 717e9c9f8..44baeaccf 100644 --- a/app/Entities/Repos/BaseRepo.php +++ b/app/Entities/Repos/BaseRepo.php @@ -16,6 +16,7 @@ use BookStack\References\ReferenceUpdater; use BookStack\Sorting\BookSorter; use BookStack\Uploads\ImageRepo; use BookStack\Util\HtmlDescriptionFilter; +use BookStack\Util\HtmlToPlainText; use Illuminate\Http\UploadedFile; class BaseRepo @@ -151,9 +152,10 @@ class BaseRepo } if (isset($input['description_html'])) { + $plainTextConverter = new HtmlToPlainText(); $entity->descriptionInfo()->set( HtmlDescriptionFilter::filterFromString($input['description_html']), - html_entity_decode(strip_tags($input['description_html'])) + $plainTextConverter->convert($input['description_html']), ); } else if (isset($input['description'])) { $entity->descriptionInfo()->set('', $input['description']); diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 8d89a86cf..b86fbbe8b 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -16,6 +16,7 @@ use BookStack\Users\Models\User; use BookStack\Util\HtmlContentFilter; use BookStack\Util\HtmlContentFilterConfig; use BookStack\Util\HtmlDocument; +use BookStack\Util\HtmlToPlainText; use BookStack\Util\WebSafeMimeSniffer; use Closure; use DOMElement; @@ -303,8 +304,8 @@ class PageContent public function toPlainText(): string { $html = $this->render(true); - - return html_entity_decode(strip_tags($html)); + $converter = new HtmlToPlainText(); + return $converter->convert($html); } /** diff --git a/app/Util/HtmlToPlainText.php b/app/Util/HtmlToPlainText.php new file mode 100644 index 000000000..79da9e3d8 --- /dev/null +++ b/app/Util/HtmlToPlainText.php @@ -0,0 +1,47 @@ +nodeToText($doc->getBody()); + + // Remove repeated newlines + $text = preg_replace('/\n+/', "\n", $text); + // Remove leading/trailing whitespace + $text = trim($text); + + return $text; + } + + protected function nodeToText(\DOMNode $node): string + { + if ($node->nodeType === XML_TEXT_NODE) { + return $node->textContent; + } + + $text = ''; + if (!in_array($node->nodeName, $this->inlineTags)) { + $text .= "\n"; + } + + foreach ($node->childNodes as $childNode) { + $text .= $this->nodeToText($childNode); + } + + return $text; + } +} diff --git a/tests/Util/HtmlToPlainTextTest.php b/tests/Util/HtmlToPlainTextTest.php new file mode 100644 index 000000000..e522e4863 --- /dev/null +++ b/tests/Util/HtmlToPlainTextTest.php @@ -0,0 +1,63 @@ +This is a test

+ +

A Header

+

more <©> text with bold

+HTML; + $expected = << text with bold +TEXT; + + $this->runTest($html, $expected); + } + + public function test_adjacent_list_items_are_separated_by_newline() + { + $html = <<
  • Item A
  • Item B
  • +HTML; + $expected = <<runTest($html, $expected); + } + + public function test_inline_formats_dont_cause_newlines() + { + $html = <<Hello

    +HTML; + $expected = <<runTest($html, $expected); + } + + protected function runTest(string $html, string $expected): void + { + $converter = new HtmlToPlainText(); + $result = $converter->convert(trim($html)); + $this->assertEquals(trim($expected), $result); + } +}