mirror of
https://github.com/BookStackApp/BookStack.git
synced 2026-05-04 18:08:46 +03:00
To centralise logic to be more consistent, and to have smarter logic which avoids just following newline format from input, preventing smushing HTML elements (like list elements) next to eachother
48 lines
1.1 KiB
PHP
48 lines
1.1 KiB
PHP
<?php
|
|
|
|
namespace BookStack\Util;
|
|
|
|
class HtmlToPlainText
|
|
{
|
|
/**
|
|
* Inline tags types where the content should not be put on a new line.
|
|
*/
|
|
protected array $inlineTags = [
|
|
'a', 'b', 'i', 'u', 'strong', 'em', 'small', 'sup', 'sub', 'span', 'div',
|
|
];
|
|
|
|
/**
|
|
* Convert the provided HTML to relatively clean plain text.
|
|
*/
|
|
public function convert(string $html): string
|
|
{
|
|
$doc = new HtmlDocument($html);
|
|
$text = $this->nodeToText($doc->getBody());
|
|
|
|
// Remove repeated newlines
|
|
$text = preg_replace('/\n+/', "\n", $text);
|
|
// Remove leading/trailing whitespace
|
|
$text = trim($text);
|
|
|
|
return $text;
|
|
}
|
|
|
|
protected function nodeToText(\DOMNode $node): string
|
|
{
|
|
if ($node->nodeType === XML_TEXT_NODE) {
|
|
return $node->textContent;
|
|
}
|
|
|
|
$text = '';
|
|
if (!in_array($node->nodeName, $this->inlineTags)) {
|
|
$text .= "\n";
|
|
}
|
|
|
|
foreach ($node->childNodes as $childNode) {
|
|
$text .= $this->nodeToText($childNode);
|
|
}
|
|
|
|
return $text;
|
|
}
|
|
}
|