diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index ca06e6961..67c6e4cf6 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -321,12 +321,13 @@ class PageContent $cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml()); $cached = cache()->get($cacheKey, null); if ($cached !== null) { - return $cached; +// return $cached; } $filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering')); $filter = new HtmlContentFilter($filterConfig); $filtered = $filter->filterDocument($doc); +// $filtered = $doc->getBodyInnerHtml(); $cacheTime = 86400 * 7; // 1 week cache()->put($cacheKey, $filtered, $cacheTime); diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php new file mode 100644 index 000000000..5aab25b47 --- /dev/null +++ b/app/Util/ConfiguredHtmlPurifier.php @@ -0,0 +1,101 @@ +setConfig($config); + + $htmlDef = $config->getDefinition('HTML', true, true); + if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) { + $this->configureDefinition($htmlDef); + } + + $this->purifier = new HTMLPurifier($config); + } + + protected function setConfig(HTMLPurifier_Config $config): void + { + $config->set('Cache.SerializerPath', storage_path('purifier')); + $config->set('CSS.AllowTricky', true); + $config->set('HTML.SafeIframe', true); + $config->set('Attr.EnableID', true); + $config->set('Attr.ID.HTML5', true); + $config->set('Output.FixInnerHTML', false); + $config->set('URI.SafeIframeRegexp', '%^(http://|https://)%'); + $config->set('URI.AllowedSchemes', [ + 'http' => true, + 'https' => true, + 'mailto' => true, + 'ftp' => true, + 'nntp' => true, + 'news' => true, + 'tel' => true, + 'file' => true, + ]); + + $config->set('Cache.DefinitionImpl', null); // Disable cache during testing + } + + public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void + { + // Allow the object element + $definition->addElement( + 'object', + 'Inline', + 'Flow', + 'Common', + [ + 'data' => 'URI', + 'type' => 'Text', + 'width' => 'Length', + 'height' => 'Length', + ] + ); + + // Allow the embed element + $definition->addElement( + 'embed', + 'Inline', + 'Empty', + 'Common', + [ + 'src' => 'URI', + 'type' => 'Text', + 'width' => 'Length', + 'height' => 'Length', + ] + ); + + // Allow checkbox inputs + $definition->addElement( + 'input', + 'Formctrl', + 'Empty', + 'Common', + [ + 'checked' => 'Bool#checked', + 'disabled' => 'Bool#disabled', + 'name' => 'Text', + 'readonly' => 'Bool#readonly', + 'type' => 'Enum#checkbox', + 'value' => 'Text', + ] + ); + } + + public function purify(string $html): string + { + return $this->purifier->purify($html); + } +} diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php index 842e42467..79b1cdc93 100644 --- a/app/Util/HtmlContentFilter.php +++ b/app/Util/HtmlContentFilter.php @@ -5,8 +5,6 @@ namespace BookStack\Util; use DOMAttr; use DOMElement; use DOMNodeList; -use HTMLPurifier; -use HTMLPurifier_HTML5Config; class HtmlContentFilter { @@ -45,9 +43,7 @@ class HtmlContentFilter protected function applyAllowListFiltering(string $html): string { - $config = HTMLPurifier_HTML5Config::createDefault(); - $config->set('Cache.SerializerPath', storage_path('purifier')); - $purifier = new HTMLPurifier($config); + $purifier = new ConfiguredHtmlPurifier(); return $purifier->purify($html); }