mirror of
https://github.com/BookStackApp/BookStack.git
synced 2026-05-04 18:08:46 +03:00
Plain text: Created a new HTML to plain text converter
To centralise logic to be more consistent, and to have smarter logic which avoids just following newline format from input, preventing smushing HTML elements (like list elements) next to eachother
This commit is contained in:
@@ -9,6 +9,7 @@ use BookStack\Users\Models\HasCreatorAndUpdater;
|
||||
use BookStack\Users\Models\OwnableInterface;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use BookStack\Util\HtmlToPlainText;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
@@ -87,6 +88,12 @@ class Comment extends Model implements Loggable, OwnableInterface
|
||||
return $filter->filterString($this->html ?? '');
|
||||
}
|
||||
|
||||
public function getPlainText(): string
|
||||
{
|
||||
$converter = new HtmlToPlainText();
|
||||
return $converter->convert($this->html ?? '');
|
||||
}
|
||||
|
||||
public function jointPermissions(): HasMany
|
||||
{
|
||||
return $this->hasMany(JointPermission::class, 'entity_id', 'commentable_id')
|
||||
|
||||
@@ -24,7 +24,7 @@ class CommentCreationNotification extends BaseActivityNotification
|
||||
$locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page),
|
||||
$locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable),
|
||||
$locale->trans('notifications.detail_commenter') => $this->user->name,
|
||||
$locale->trans('notifications.detail_comment') => strip_tags($comment->html),
|
||||
$locale->trans('notifications.detail_comment') => $comment->getPlainText(),
|
||||
]);
|
||||
|
||||
return $this->newMailMessage($locale)
|
||||
|
||||
@@ -24,7 +24,7 @@ class CommentMentionNotification extends BaseActivityNotification
|
||||
$locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page),
|
||||
$locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable),
|
||||
$locale->trans('notifications.detail_commenter') => $this->user->name,
|
||||
$locale->trans('notifications.detail_comment') => strip_tags($comment->html),
|
||||
$locale->trans('notifications.detail_comment') => $comment->getPlainText(),
|
||||
]);
|
||||
|
||||
return $this->newMailMessage($locale)
|
||||
|
||||
@@ -16,6 +16,7 @@ use BookStack\References\ReferenceUpdater;
|
||||
use BookStack\Sorting\BookSorter;
|
||||
use BookStack\Uploads\ImageRepo;
|
||||
use BookStack\Util\HtmlDescriptionFilter;
|
||||
use BookStack\Util\HtmlToPlainText;
|
||||
use Illuminate\Http\UploadedFile;
|
||||
|
||||
class BaseRepo
|
||||
@@ -151,9 +152,10 @@ class BaseRepo
|
||||
}
|
||||
|
||||
if (isset($input['description_html'])) {
|
||||
$plainTextConverter = new HtmlToPlainText();
|
||||
$entity->descriptionInfo()->set(
|
||||
HtmlDescriptionFilter::filterFromString($input['description_html']),
|
||||
html_entity_decode(strip_tags($input['description_html']))
|
||||
$plainTextConverter->convert($input['description_html']),
|
||||
);
|
||||
} else if (isset($input['description'])) {
|
||||
$entity->descriptionInfo()->set('', $input['description']);
|
||||
|
||||
@@ -16,6 +16,7 @@ use BookStack\Users\Models\User;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use BookStack\Util\HtmlToPlainText;
|
||||
use BookStack\Util\WebSafeMimeSniffer;
|
||||
use Closure;
|
||||
use DOMElement;
|
||||
@@ -303,8 +304,8 @@ class PageContent
|
||||
public function toPlainText(): string
|
||||
{
|
||||
$html = $this->render(true);
|
||||
|
||||
return html_entity_decode(strip_tags($html));
|
||||
$converter = new HtmlToPlainText();
|
||||
return $converter->convert($html);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
47
app/Util/HtmlToPlainText.php
Normal file
47
app/Util/HtmlToPlainText.php
Normal file
@@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Util;
|
||||
|
||||
class HtmlToPlainText
|
||||
{
|
||||
/**
|
||||
* Inline tags types where the content should not be put on a new line.
|
||||
*/
|
||||
protected array $inlineTags = [
|
||||
'a', 'b', 'i', 'u', 'strong', 'em', 'small', 'sup', 'sub', 'span', 'div',
|
||||
];
|
||||
|
||||
/**
|
||||
* Convert the provided HTML to relatively clean plain text.
|
||||
*/
|
||||
public function convert(string $html): string
|
||||
{
|
||||
$doc = new HtmlDocument($html);
|
||||
$text = $this->nodeToText($doc->getBody());
|
||||
|
||||
// Remove repeated newlines
|
||||
$text = preg_replace('/\n+/', "\n", $text);
|
||||
// Remove leading/trailing whitespace
|
||||
$text = trim($text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
protected function nodeToText(\DOMNode $node): string
|
||||
{
|
||||
if ($node->nodeType === XML_TEXT_NODE) {
|
||||
return $node->textContent;
|
||||
}
|
||||
|
||||
$text = '';
|
||||
if (!in_array($node->nodeName, $this->inlineTags)) {
|
||||
$text .= "\n";
|
||||
}
|
||||
|
||||
foreach ($node->childNodes as $childNode) {
|
||||
$text .= $this->nodeToText($childNode);
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
63
tests/Util/HtmlToPlainTextTest.php
Normal file
63
tests/Util/HtmlToPlainTextTest.php
Normal file
@@ -0,0 +1,63 @@
|
||||
<?php
|
||||
|
||||
namespace Tests\Util;
|
||||
|
||||
use BookStack\Util\HtmlToPlainText;
|
||||
use Tests\TestCase;
|
||||
|
||||
class HtmlToPlainTextTest extends TestCase
|
||||
{
|
||||
public function test_it_converts_html_to_plain_text()
|
||||
{
|
||||
$html = <<<HTML
|
||||
<p>This is a test</p>
|
||||
<ul>
|
||||
<li>Item 1</li>
|
||||
<li>Item 2</li>
|
||||
</ul>
|
||||
<h2>A Header</h2>
|
||||
<p>more <©> text <strong>with bold</strong></p>
|
||||
HTML;
|
||||
$expected = <<<TEXT
|
||||
This is a test
|
||||
Item 1
|
||||
Item 2
|
||||
A Header
|
||||
more <©> text with bold
|
||||
TEXT;
|
||||
|
||||
$this->runTest($html, $expected);
|
||||
}
|
||||
|
||||
public function test_adjacent_list_items_are_separated_by_newline()
|
||||
{
|
||||
$html = <<<HTML
|
||||
<ul><li>Item A</li><li>Item B</li></ul>
|
||||
HTML;
|
||||
$expected = <<<TEXT
|
||||
Item A
|
||||
Item B
|
||||
TEXT;
|
||||
|
||||
$this->runTest($html, $expected);
|
||||
}
|
||||
|
||||
public function test_inline_formats_dont_cause_newlines()
|
||||
{
|
||||
$html = <<<HTML
|
||||
<p><strong>H</strong><a>e</a><sup>l</sup><span>l</span><em>o</em></p>
|
||||
HTML;
|
||||
$expected = <<<TEXT
|
||||
Hello
|
||||
TEXT;
|
||||
|
||||
$this->runTest($html, $expected);
|
||||
}
|
||||
|
||||
protected function runTest(string $html, string $expected): void
|
||||
{
|
||||
$converter = new HtmlToPlainText();
|
||||
$result = $converter->convert(trim($html));
|
||||
$this->assertEquals(trim($expected), $result);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user