diff --git a/app/Activity/Models/Comment.php b/app/Activity/Models/Comment.php index ab7d91772..3faa76657 100644 --- a/app/Activity/Models/Comment.php +++ b/app/Activity/Models/Comment.php @@ -9,6 +9,7 @@ use BookStack\Users\Models\HasCreatorAndUpdater; use BookStack\Users\Models\OwnableInterface; use BookStack\Util\HtmlContentFilter; use BookStack\Util\HtmlContentFilterConfig; +use BookStack\Util\HtmlToPlainText; use Illuminate\Database\Eloquent\Builder; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\BelongsTo; @@ -87,6 +88,12 @@ class Comment extends Model implements Loggable, OwnableInterface return $filter->filterString($this->html ?? ''); } + public function getPlainText(): string + { + $converter = new HtmlToPlainText(); + return $converter->convert($this->html ?? ''); + } + public function jointPermissions(): HasMany { return $this->hasMany(JointPermission::class, 'entity_id', 'commentable_id') diff --git a/app/Activity/Notifications/Messages/CommentCreationNotification.php b/app/Activity/Notifications/Messages/CommentCreationNotification.php index 30d0ffa2b..d739f4aab 100644 --- a/app/Activity/Notifications/Messages/CommentCreationNotification.php +++ b/app/Activity/Notifications/Messages/CommentCreationNotification.php @@ -24,7 +24,7 @@ class CommentCreationNotification extends BaseActivityNotification $locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page), $locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable), $locale->trans('notifications.detail_commenter') => $this->user->name, - $locale->trans('notifications.detail_comment') => strip_tags($comment->html), + $locale->trans('notifications.detail_comment') => $comment->getPlainText(), ]); return $this->newMailMessage($locale) diff --git a/app/Activity/Notifications/Messages/CommentMentionNotification.php b/app/Activity/Notifications/Messages/CommentMentionNotification.php index de9e71963..4c8ee5bab 100644 --- a/app/Activity/Notifications/Messages/CommentMentionNotification.php +++ b/app/Activity/Notifications/Messages/CommentMentionNotification.php @@ -24,7 +24,7 @@ class CommentMentionNotification extends BaseActivityNotification $locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page), $locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable), $locale->trans('notifications.detail_commenter') => $this->user->name, - $locale->trans('notifications.detail_comment') => strip_tags($comment->html), + $locale->trans('notifications.detail_comment') => $comment->getPlainText(), ]); return $this->newMailMessage($locale) diff --git a/app/Entities/Repos/BaseRepo.php b/app/Entities/Repos/BaseRepo.php index 717e9c9f8..44baeaccf 100644 --- a/app/Entities/Repos/BaseRepo.php +++ b/app/Entities/Repos/BaseRepo.php @@ -16,6 +16,7 @@ use BookStack\References\ReferenceUpdater; use BookStack\Sorting\BookSorter; use BookStack\Uploads\ImageRepo; use BookStack\Util\HtmlDescriptionFilter; +use BookStack\Util\HtmlToPlainText; use Illuminate\Http\UploadedFile; class BaseRepo @@ -151,9 +152,10 @@ class BaseRepo } if (isset($input['description_html'])) { + $plainTextConverter = new HtmlToPlainText(); $entity->descriptionInfo()->set( HtmlDescriptionFilter::filterFromString($input['description_html']), - html_entity_decode(strip_tags($input['description_html'])) + $plainTextConverter->convert($input['description_html']), ); } else if (isset($input['description'])) { $entity->descriptionInfo()->set('', $input['description']); diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 8d89a86cf..b86fbbe8b 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -16,6 +16,7 @@ use BookStack\Users\Models\User; use BookStack\Util\HtmlContentFilter; use BookStack\Util\HtmlContentFilterConfig; use BookStack\Util\HtmlDocument; +use BookStack\Util\HtmlToPlainText; use BookStack\Util\WebSafeMimeSniffer; use Closure; use DOMElement; @@ -303,8 +304,8 @@ class PageContent public function toPlainText(): string { $html = $this->render(true); - - return html_entity_decode(strip_tags($html)); + $converter = new HtmlToPlainText(); + return $converter->convert($html); } /** diff --git a/app/Util/HtmlToPlainText.php b/app/Util/HtmlToPlainText.php new file mode 100644 index 000000000..79da9e3d8 --- /dev/null +++ b/app/Util/HtmlToPlainText.php @@ -0,0 +1,47 @@ +nodeToText($doc->getBody()); + + // Remove repeated newlines + $text = preg_replace('/\n+/', "\n", $text); + // Remove leading/trailing whitespace + $text = trim($text); + + return $text; + } + + protected function nodeToText(\DOMNode $node): string + { + if ($node->nodeType === XML_TEXT_NODE) { + return $node->textContent; + } + + $text = ''; + if (!in_array($node->nodeName, $this->inlineTags)) { + $text .= "\n"; + } + + foreach ($node->childNodes as $childNode) { + $text .= $this->nodeToText($childNode); + } + + return $text; + } +} diff --git a/tests/Util/HtmlToPlainTextTest.php b/tests/Util/HtmlToPlainTextTest.php new file mode 100644 index 000000000..e522e4863 --- /dev/null +++ b/tests/Util/HtmlToPlainTextTest.php @@ -0,0 +1,63 @@ +This is a test

+ +

A Header

+

more <©> text with bold

+HTML; + $expected = << text with bold +TEXT; + + $this->runTest($html, $expected); + } + + public function test_adjacent_list_items_are_separated_by_newline() + { + $html = <<
  • Item A
  • Item B
  • +HTML; + $expected = <<runTest($html, $expected); + } + + public function test_inline_formats_dont_cause_newlines() + { + $html = <<Hello

    +HTML; + $expected = <<runTest($html, $expected); + } + + protected function runTest(string $html, string $expected): void + { + $converter = new HtmlToPlainText(); + $result = $converter->convert(trim($html)); + $this->assertEquals(trim($expected), $result); + } +}