mirror of
https://github.com/BookStackApp/BookStack.git
synced 2026-02-24 19:07:20 +03:00
Page Content: Added more complex & configurable content filtering
- Added new option to control parts of the filter. - Added whitelist filtering pass via HTMLPurifier.
This commit is contained in:
@@ -8,6 +8,7 @@ use BookStack\Permissions\PermissionApplicator;
|
||||
use BookStack\Users\Models\HasCreatorAndUpdater;
|
||||
use BookStack\Users\Models\OwnableInterface;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
@@ -82,7 +83,8 @@ class Comment extends Model implements Loggable, OwnableInterface
|
||||
|
||||
public function safeHtml(): string
|
||||
{
|
||||
return HtmlContentFilter::removeActiveContentFromHtmlString($this->html ?? '');
|
||||
$filter = new HtmlContentFilter(new HtmlContentFilterConfig());
|
||||
return $filter->filterString($this->html ?? '');
|
||||
}
|
||||
|
||||
public function jointPermissions(): HasMany
|
||||
|
||||
@@ -42,6 +42,18 @@ return [
|
||||
// Even when overridden the WYSIWYG editor may still escape script content.
|
||||
'allow_content_scripts' => env('ALLOW_CONTENT_SCRIPTS', false),
|
||||
|
||||
// Control the behaviour of page content filtering.
|
||||
// This setting is a collection of characters which represent different available filters:
|
||||
// - j - Filter out JavaScript based content
|
||||
// - h - Filter out unexpected, potentially dangerous, HTML elements
|
||||
// - f - Filter out unexpected form elements
|
||||
// - a - Run content through a more complex allow-list filter
|
||||
// This defaults to using all filters, unless ALLOW_CONTENT_SCRIPTS is set to true in which case no filters are used.
|
||||
// Note: These filters are a best attempt, and may not be 100% effective. They are typically a layer used in addition to other security measures.
|
||||
// TODO - Add to example env
|
||||
// TODO - Remove allow_content_scripts option above
|
||||
'content_filtering' => env('CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jfha'),
|
||||
|
||||
// Allow server-side fetches to be performed to potentially unknown
|
||||
// and user-provided locations. Primarily used in exports when loading
|
||||
// in externally referenced assets.
|
||||
|
||||
@@ -6,6 +6,7 @@ use BookStack\Entities\Models\Book;
|
||||
use BookStack\Entities\Models\Bookshelf;
|
||||
use BookStack\Entities\Models\Chapter;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
|
||||
class EntityHtmlDescription
|
||||
{
|
||||
@@ -50,7 +51,8 @@ class EntityHtmlDescription
|
||||
return $html;
|
||||
}
|
||||
|
||||
return HtmlContentFilter::removeActiveContentFromHtmlString($html);
|
||||
$filter = new HtmlContentFilter(new HtmlContentFilterConfig());
|
||||
return $filter->filterString($html);
|
||||
}
|
||||
|
||||
public function getPlain(): string
|
||||
|
||||
@@ -13,6 +13,7 @@ use BookStack\Uploads\ImageRepo;
|
||||
use BookStack\Uploads\ImageService;
|
||||
use BookStack\Users\Models\User;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use BookStack\Util\WebSafeMimeSniffer;
|
||||
use Closure;
|
||||
@@ -317,11 +318,28 @@ class PageContent
|
||||
$this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
|
||||
}
|
||||
|
||||
if (!config('app.allow_content_scripts')) {
|
||||
HtmlContentFilter::removeActiveContentFromDocument($doc);
|
||||
$cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
|
||||
$cached = cache()->get($cacheKey, null);
|
||||
if ($cached !== null) {
|
||||
return $cached;
|
||||
}
|
||||
|
||||
return $doc->getBodyInnerHtml();
|
||||
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
|
||||
$filter = new HtmlContentFilter($filterConfig);
|
||||
$filtered = $filter->filterDocument($doc);
|
||||
|
||||
$cacheTime = 86400 * 7; // 1 week
|
||||
cache()->put($cacheKey, $filtered, $cacheTime);
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
protected function getContentCacheKey(string $html): string
|
||||
{
|
||||
$contentHash = md5($html);
|
||||
$contentId = $this->page->id;
|
||||
$contentTime = $this->page->updated_at->timestamp;
|
||||
return "page-content-cache::{$contentId}::{$contentTime}::{$contentHash}";
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -4,25 +4,16 @@ namespace BookStack\Theming;
|
||||
|
||||
use BookStack\Util\CspService;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use BookStack\Util\HtmlNonceApplicator;
|
||||
use Illuminate\Contracts\Cache\Repository as Cache;
|
||||
|
||||
class CustomHtmlHeadContentProvider
|
||||
{
|
||||
/**
|
||||
* @var CspService
|
||||
*/
|
||||
protected $cspService;
|
||||
|
||||
/**
|
||||
* @var Cache
|
||||
*/
|
||||
protected $cache;
|
||||
|
||||
public function __construct(CspService $cspService, Cache $cache)
|
||||
{
|
||||
$this->cspService = $cspService;
|
||||
$this->cache = $cache;
|
||||
public function __construct(
|
||||
protected CspService $cspService,
|
||||
protected Cache $cache
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -50,7 +41,8 @@ class CustomHtmlHeadContentProvider
|
||||
$hash = md5($content);
|
||||
|
||||
return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
|
||||
return HtmlContentFilter::removeActiveContentFromHtmlString($content);
|
||||
$config = new HtmlContentFilterConfig(filterOutNonContentElements: false);
|
||||
return (new HtmlContentFilter($config))->filterString($content);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -5,15 +5,53 @@ namespace BookStack\Util;
|
||||
use DOMAttr;
|
||||
use DOMElement;
|
||||
use DOMNodeList;
|
||||
use HTMLPurifier;
|
||||
use HTMLPurifier_HTML5Config;
|
||||
|
||||
class HtmlContentFilter
|
||||
{
|
||||
/**
|
||||
* Remove all active content from the given HTML document.
|
||||
* This aims to cover anything which can dynamically deal with, or send, data
|
||||
* like any JavaScript actions or form content.
|
||||
*/
|
||||
public static function removeActiveContentFromDocument(HtmlDocument $doc): void
|
||||
public function __construct(
|
||||
protected HtmlContentFilterConfig $config
|
||||
) {
|
||||
}
|
||||
|
||||
public function filterDocument(HtmlDocument $doc): string
|
||||
{
|
||||
if ($this->config->filterOutJavaScript) {
|
||||
$this->filterOutScriptsFromDocument($doc);
|
||||
}
|
||||
if ($this->config->filterOutFormElements) {
|
||||
$this->filterOutFormElementsFromDocument($doc);
|
||||
}
|
||||
if ($this->config->filterOutBadHtmlElements) {
|
||||
$this->filterOutBadHtmlElementsFromDocument($doc);
|
||||
}
|
||||
if ($this->config->filterOutNonContentElements) {
|
||||
$this->filterOutNonContentElementsFromDocument($doc);
|
||||
}
|
||||
|
||||
$filtered = $doc->getBodyInnerHtml();
|
||||
if ($this->config->useAllowListFilter) {
|
||||
$filtered = $this->applyAllowListFiltering($filtered);
|
||||
}
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
public function filterString(string $html): string
|
||||
{
|
||||
return $this->filterDocument(new HtmlDocument($html));
|
||||
}
|
||||
|
||||
protected function applyAllowListFiltering(string $html): string
|
||||
{
|
||||
$config = HTMLPurifier_HTML5Config::createDefault();
|
||||
$config->set('Cache.SerializerPath', storage_path('purifier'));
|
||||
$purifier = new HTMLPurifier($config);
|
||||
return $purifier->purify($html);
|
||||
}
|
||||
|
||||
protected function filterOutScriptsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
// Remove standard script tags
|
||||
$scriptElems = $doc->queryXPath('//script');
|
||||
@@ -27,10 +65,6 @@ class HtmlContentFilter
|
||||
$badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
|
||||
static::removeNodes($badForms);
|
||||
|
||||
// Remove meta tag to prevent external redirects
|
||||
$metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
|
||||
static::removeNodes($metaTags);
|
||||
|
||||
// Remove data or JavaScript iFrames
|
||||
$badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
|
||||
static::removeNodes($badIframes);
|
||||
@@ -49,7 +83,10 @@ class HtmlContentFilter
|
||||
// Remove 'on*' attributes
|
||||
$onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
|
||||
static::removeAttributes($onAttributes);
|
||||
}
|
||||
|
||||
protected function filterOutFormElementsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
// Remove form elements
|
||||
$formElements = ['form', 'fieldset', 'button', 'textarea', 'select'];
|
||||
foreach ($formElements as $formElement) {
|
||||
@@ -75,41 +112,21 @@ class HtmlContentFilter
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove active content from the given HTML string.
|
||||
* This aims to cover anything which can dynamically deal with, or send, data
|
||||
* like any JavaScript actions or form content.
|
||||
*/
|
||||
public static function removeActiveContentFromHtmlString(string $html): string
|
||||
protected function filterOutBadHtmlElementsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
if (empty($html)) {
|
||||
return $html;
|
||||
// Remove meta tag to prevent external redirects
|
||||
$metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
|
||||
static::removeNodes($metaTags);
|
||||
}
|
||||
|
||||
protected function filterOutNonContentElementsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
// Remove non-content elements
|
||||
$formElements = ['link', 'style', 'meta', 'title', 'template'];
|
||||
foreach ($formElements as $formElement) {
|
||||
$matchingFormElements = $doc->queryXPath('//' . $formElement);
|
||||
static::removeNodes($matchingFormElements);
|
||||
}
|
||||
|
||||
$doc = new HtmlDocument($html);
|
||||
static::removeActiveContentFromDocument($doc);
|
||||
|
||||
return $doc->getBodyInnerHtml();
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use removeActiveContentFromDocument instead.
|
||||
*/
|
||||
public static function removeScriptsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
static::removeActiveContentFromDocument($doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use removeActiveContentFromHtmlString instead.
|
||||
*/
|
||||
public static function removeScriptsFromHtmlString(string $html): string
|
||||
{
|
||||
return static::removeActiveContentFromHtmlString($html);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -147,4 +164,34 @@ class HtmlContentFilter
|
||||
$parentNode->removeAttribute($attrName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use filterDocument instead.
|
||||
*/
|
||||
public static function removeScriptsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
$config = new HtmlContentFilterConfig(
|
||||
filterOutNonContentElements: false,
|
||||
useAllowListFilter: false,
|
||||
);
|
||||
$filter = new static($config);
|
||||
$filter->filterDocument($doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use filterString instead.
|
||||
*/
|
||||
public static function removeScriptsFromHtmlString(string $html): string
|
||||
{
|
||||
$config = new HtmlContentFilterConfig(
|
||||
filterOutNonContentElements: false,
|
||||
useAllowListFilter: false,
|
||||
);
|
||||
$filter = new static($config);
|
||||
return $filter->filterString($html);
|
||||
}
|
||||
}
|
||||
|
||||
31
app/Util/HtmlContentFilterConfig.php
Normal file
31
app/Util/HtmlContentFilterConfig.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Util;
|
||||
|
||||
readonly class HtmlContentFilterConfig
|
||||
{
|
||||
public function __construct(
|
||||
public bool $filterOutJavaScript = true,
|
||||
public bool $filterOutBadHtmlElements = true,
|
||||
public bool $filterOutFormElements = true,
|
||||
public bool $filterOutNonContentElements = true,
|
||||
public bool $useAllowListFilter = true,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance from a config string, where the string
|
||||
* is a combination of characters to enable filters.
|
||||
*/
|
||||
public static function fromConfigString(string $config): self
|
||||
{
|
||||
$config = strtolower($config);
|
||||
return new self(
|
||||
filterOutJavaScript: str_contains($config, 'j'),
|
||||
filterOutBadHtmlElements: str_contains($config, 'h'),
|
||||
filterOutFormElements: str_contains($config, 'f'),
|
||||
filterOutNonContentElements: str_contains($config, 'h'),
|
||||
useAllowListFilter: str_contains($config, 'a'),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@
|
||||
"ext-zip": "*",
|
||||
"bacon/bacon-qr-code": "^3.0",
|
||||
"dompdf/dompdf": "^3.1",
|
||||
"ezyang/htmlpurifier": "^4.19",
|
||||
"guzzlehttp/guzzle": "^7.4",
|
||||
"intervention/image": "^3.5",
|
||||
"knplabs/knp-snappy": "^1.5",
|
||||
@@ -38,7 +39,8 @@
|
||||
"socialiteproviders/microsoft-azure": "^5.1",
|
||||
"socialiteproviders/okta": "^4.2",
|
||||
"socialiteproviders/twitch": "^5.3",
|
||||
"ssddanbrown/htmldiff": "^2.0.0"
|
||||
"ssddanbrown/htmldiff": "^2.0.0",
|
||||
"xemlock/htmlpurifier-html5": "^0.1.12"
|
||||
},
|
||||
"require-dev": {
|
||||
"fakerphp/faker": "^1.21",
|
||||
|
||||
123
composer.lock
generated
123
composer.lock
generated
@@ -4,7 +4,7 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "556613432c8fb7d8f96bcf637c8c07a9",
|
||||
"content-hash": "8dc695e5ecb6cea01e282394da136713",
|
||||
"packages": [
|
||||
{
|
||||
"name": "aws/aws-crt-php",
|
||||
@@ -919,6 +919,67 @@
|
||||
],
|
||||
"time": "2025-03-06T22:45:56+00:00"
|
||||
},
|
||||
{
|
||||
"name": "ezyang/htmlpurifier",
|
||||
"version": "v4.19.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ezyang/htmlpurifier.git",
|
||||
"reference": "b287d2a16aceffbf6e0295559b39662612b77fcf"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/b287d2a16aceffbf6e0295559b39662612b77fcf",
|
||||
"reference": "b287d2a16aceffbf6e0295559b39662612b77fcf",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0 || ~8.4.0 || ~8.5.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"cerdic/css-tidy": "^1.7 || ^2.0",
|
||||
"simpletest/simpletest": "dev-master"
|
||||
},
|
||||
"suggest": {
|
||||
"cerdic/css-tidy": "If you want to use the filter 'Filter.ExtractStyleBlocks'.",
|
||||
"ext-bcmath": "Used for unit conversion and imagecrash protection",
|
||||
"ext-iconv": "Converts text to and from non-UTF-8 encodings",
|
||||
"ext-tidy": "Used for pretty-printing HTML"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"files": [
|
||||
"library/HTMLPurifier.composer.php"
|
||||
],
|
||||
"psr-0": {
|
||||
"HTMLPurifier": "library/"
|
||||
},
|
||||
"exclude-from-classmap": [
|
||||
"/library/HTMLPurifier/Language/"
|
||||
]
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"LGPL-2.1-or-later"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Edward Z. Yang",
|
||||
"email": "admin@htmlpurifier.org",
|
||||
"homepage": "http://ezyang.com"
|
||||
}
|
||||
],
|
||||
"description": "Standards compliant HTML filter written in PHP",
|
||||
"homepage": "http://htmlpurifier.org/",
|
||||
"keywords": [
|
||||
"html"
|
||||
],
|
||||
"support": {
|
||||
"issues": "https://github.com/ezyang/htmlpurifier/issues",
|
||||
"source": "https://github.com/ezyang/htmlpurifier/tree/v4.19.0"
|
||||
},
|
||||
"time": "2025-10-17T16:34:55+00:00"
|
||||
},
|
||||
{
|
||||
"name": "firebase/php-jwt",
|
||||
"version": "v7.0.2",
|
||||
@@ -8279,6 +8340,66 @@
|
||||
}
|
||||
],
|
||||
"time": "2024-11-21T01:49:47+00:00"
|
||||
},
|
||||
{
|
||||
"name": "xemlock/htmlpurifier-html5",
|
||||
"version": "v0.1.12",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/xemlock/htmlpurifier-html5.git",
|
||||
"reference": "535349cb160bf79752920e1e83c4a94c3e7d2b21"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/xemlock/htmlpurifier-html5/zipball/535349cb160bf79752920e1e83c4a94c3e7d2b21",
|
||||
"reference": "535349cb160bf79752920e1e83c4a94c3e7d2b21",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ezyang/htmlpurifier": "^4.8",
|
||||
"php": ">=5.3"
|
||||
},
|
||||
"require-dev": {
|
||||
"masterminds/html5": "^2.7",
|
||||
"php-coveralls/php-coveralls": "^1.1|^2.1",
|
||||
"phpunit/phpunit": ">=4.7 <10.0"
|
||||
},
|
||||
"suggest": {
|
||||
"masterminds/html5": "Required to use HTMLPurifier_Lexer_HTML5"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"classmap": [
|
||||
"library/HTMLPurifier/"
|
||||
]
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "xemlock",
|
||||
"email": "xemlock@gmail.com"
|
||||
}
|
||||
],
|
||||
"description": "HTML5 support for HTML Purifier",
|
||||
"homepage": "https://github.com/xemlock/htmlpurifier-html5",
|
||||
"keywords": [
|
||||
"HTML5",
|
||||
"Purifier",
|
||||
"html",
|
||||
"htmlpurifier",
|
||||
"security",
|
||||
"tidy",
|
||||
"validator",
|
||||
"xss"
|
||||
],
|
||||
"support": {
|
||||
"issues": "https://github.com/xemlock/htmlpurifier-html5/issues",
|
||||
"source": "https://github.com/xemlock/htmlpurifier-html5/tree/v0.1.12"
|
||||
},
|
||||
"time": "2026-02-09T21:03:14+00:00"
|
||||
}
|
||||
],
|
||||
"packages-dev": [
|
||||
|
||||
2
storage/purifier/.gitignore
vendored
Normal file
2
storage/purifier/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*
|
||||
!.gitignore
|
||||
Reference in New Issue
Block a user