mirror of
https://github.com/BookStackApp/BookStack.git
synced 2026-05-04 18:08:46 +03:00
Page Content: Added more complex & configurable content filtering
- Added new option to control parts of the filter. - Added whitelist filtering pass via HTMLPurifier.
This commit is contained in:
@@ -8,6 +8,7 @@ use BookStack\Permissions\PermissionApplicator;
|
||||
use BookStack\Users\Models\HasCreatorAndUpdater;
|
||||
use BookStack\Users\Models\OwnableInterface;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
@@ -82,7 +83,8 @@ class Comment extends Model implements Loggable, OwnableInterface
|
||||
|
||||
public function safeHtml(): string
|
||||
{
|
||||
return HtmlContentFilter::removeActiveContentFromHtmlString($this->html ?? '');
|
||||
$filter = new HtmlContentFilter(new HtmlContentFilterConfig());
|
||||
return $filter->filterString($this->html ?? '');
|
||||
}
|
||||
|
||||
public function jointPermissions(): HasMany
|
||||
|
||||
@@ -42,6 +42,18 @@ return [
|
||||
// Even when overridden the WYSIWYG editor may still escape script content.
|
||||
'allow_content_scripts' => env('ALLOW_CONTENT_SCRIPTS', false),
|
||||
|
||||
// Control the behaviour of page content filtering.
|
||||
// This setting is a collection of characters which represent different available filters:
|
||||
// - j - Filter out JavaScript based content
|
||||
// - h - Filter out unexpected, potentially dangerous, HTML elements
|
||||
// - f - Filter out unexpected form elements
|
||||
// - a - Run content through a more complex allow-list filter
|
||||
// This defaults to using all filters, unless ALLOW_CONTENT_SCRIPTS is set to true in which case no filters are used.
|
||||
// Note: These filters are a best attempt, and may not be 100% effective. They are typically a layer used in addition to other security measures.
|
||||
// TODO - Add to example env
|
||||
// TODO - Remove allow_content_scripts option above
|
||||
'content_filtering' => env('CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jfha'),
|
||||
|
||||
// Allow server-side fetches to be performed to potentially unknown
|
||||
// and user-provided locations. Primarily used in exports when loading
|
||||
// in externally referenced assets.
|
||||
|
||||
@@ -6,6 +6,7 @@ use BookStack\Entities\Models\Book;
|
||||
use BookStack\Entities\Models\Bookshelf;
|
||||
use BookStack\Entities\Models\Chapter;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
|
||||
class EntityHtmlDescription
|
||||
{
|
||||
@@ -50,7 +51,8 @@ class EntityHtmlDescription
|
||||
return $html;
|
||||
}
|
||||
|
||||
return HtmlContentFilter::removeActiveContentFromHtmlString($html);
|
||||
$filter = new HtmlContentFilter(new HtmlContentFilterConfig());
|
||||
return $filter->filterString($html);
|
||||
}
|
||||
|
||||
public function getPlain(): string
|
||||
|
||||
@@ -13,6 +13,7 @@ use BookStack\Uploads\ImageRepo;
|
||||
use BookStack\Uploads\ImageService;
|
||||
use BookStack\Users\Models\User;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use BookStack\Util\WebSafeMimeSniffer;
|
||||
use Closure;
|
||||
@@ -317,11 +318,28 @@ class PageContent
|
||||
$this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
|
||||
}
|
||||
|
||||
if (!config('app.allow_content_scripts')) {
|
||||
HtmlContentFilter::removeActiveContentFromDocument($doc);
|
||||
$cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
|
||||
$cached = cache()->get($cacheKey, null);
|
||||
if ($cached !== null) {
|
||||
return $cached;
|
||||
}
|
||||
|
||||
return $doc->getBodyInnerHtml();
|
||||
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
|
||||
$filter = new HtmlContentFilter($filterConfig);
|
||||
$filtered = $filter->filterDocument($doc);
|
||||
|
||||
$cacheTime = 86400 * 7; // 1 week
|
||||
cache()->put($cacheKey, $filtered, $cacheTime);
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
protected function getContentCacheKey(string $html): string
|
||||
{
|
||||
$contentHash = md5($html);
|
||||
$contentId = $this->page->id;
|
||||
$contentTime = $this->page->updated_at->timestamp;
|
||||
return "page-content-cache::{$contentId}::{$contentTime}::{$contentHash}";
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -4,25 +4,16 @@ namespace BookStack\Theming;
|
||||
|
||||
use BookStack\Util\CspService;
|
||||
use BookStack\Util\HtmlContentFilter;
|
||||
use BookStack\Util\HtmlContentFilterConfig;
|
||||
use BookStack\Util\HtmlNonceApplicator;
|
||||
use Illuminate\Contracts\Cache\Repository as Cache;
|
||||
|
||||
class CustomHtmlHeadContentProvider
|
||||
{
|
||||
/**
|
||||
* @var CspService
|
||||
*/
|
||||
protected $cspService;
|
||||
|
||||
/**
|
||||
* @var Cache
|
||||
*/
|
||||
protected $cache;
|
||||
|
||||
public function __construct(CspService $cspService, Cache $cache)
|
||||
{
|
||||
$this->cspService = $cspService;
|
||||
$this->cache = $cache;
|
||||
public function __construct(
|
||||
protected CspService $cspService,
|
||||
protected Cache $cache
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -50,7 +41,8 @@ class CustomHtmlHeadContentProvider
|
||||
$hash = md5($content);
|
||||
|
||||
return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
|
||||
return HtmlContentFilter::removeActiveContentFromHtmlString($content);
|
||||
$config = new HtmlContentFilterConfig(filterOutNonContentElements: false);
|
||||
return (new HtmlContentFilter($config))->filterString($content);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -5,15 +5,53 @@ namespace BookStack\Util;
|
||||
use DOMAttr;
|
||||
use DOMElement;
|
||||
use DOMNodeList;
|
||||
use HTMLPurifier;
|
||||
use HTMLPurifier_HTML5Config;
|
||||
|
||||
class HtmlContentFilter
|
||||
{
|
||||
/**
|
||||
* Remove all active content from the given HTML document.
|
||||
* This aims to cover anything which can dynamically deal with, or send, data
|
||||
* like any JavaScript actions or form content.
|
||||
*/
|
||||
public static function removeActiveContentFromDocument(HtmlDocument $doc): void
|
||||
public function __construct(
|
||||
protected HtmlContentFilterConfig $config
|
||||
) {
|
||||
}
|
||||
|
||||
public function filterDocument(HtmlDocument $doc): string
|
||||
{
|
||||
if ($this->config->filterOutJavaScript) {
|
||||
$this->filterOutScriptsFromDocument($doc);
|
||||
}
|
||||
if ($this->config->filterOutFormElements) {
|
||||
$this->filterOutFormElementsFromDocument($doc);
|
||||
}
|
||||
if ($this->config->filterOutBadHtmlElements) {
|
||||
$this->filterOutBadHtmlElementsFromDocument($doc);
|
||||
}
|
||||
if ($this->config->filterOutNonContentElements) {
|
||||
$this->filterOutNonContentElementsFromDocument($doc);
|
||||
}
|
||||
|
||||
$filtered = $doc->getBodyInnerHtml();
|
||||
if ($this->config->useAllowListFilter) {
|
||||
$filtered = $this->applyAllowListFiltering($filtered);
|
||||
}
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
public function filterString(string $html): string
|
||||
{
|
||||
return $this->filterDocument(new HtmlDocument($html));
|
||||
}
|
||||
|
||||
protected function applyAllowListFiltering(string $html): string
|
||||
{
|
||||
$config = HTMLPurifier_HTML5Config::createDefault();
|
||||
$config->set('Cache.SerializerPath', storage_path('purifier'));
|
||||
$purifier = new HTMLPurifier($config);
|
||||
return $purifier->purify($html);
|
||||
}
|
||||
|
||||
protected function filterOutScriptsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
// Remove standard script tags
|
||||
$scriptElems = $doc->queryXPath('//script');
|
||||
@@ -27,10 +65,6 @@ class HtmlContentFilter
|
||||
$badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
|
||||
static::removeNodes($badForms);
|
||||
|
||||
// Remove meta tag to prevent external redirects
|
||||
$metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
|
||||
static::removeNodes($metaTags);
|
||||
|
||||
// Remove data or JavaScript iFrames
|
||||
$badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
|
||||
static::removeNodes($badIframes);
|
||||
@@ -49,7 +83,10 @@ class HtmlContentFilter
|
||||
// Remove 'on*' attributes
|
||||
$onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
|
||||
static::removeAttributes($onAttributes);
|
||||
}
|
||||
|
||||
protected function filterOutFormElementsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
// Remove form elements
|
||||
$formElements = ['form', 'fieldset', 'button', 'textarea', 'select'];
|
||||
foreach ($formElements as $formElement) {
|
||||
@@ -75,41 +112,21 @@ class HtmlContentFilter
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove active content from the given HTML string.
|
||||
* This aims to cover anything which can dynamically deal with, or send, data
|
||||
* like any JavaScript actions or form content.
|
||||
*/
|
||||
public static function removeActiveContentFromHtmlString(string $html): string
|
||||
protected function filterOutBadHtmlElementsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
if (empty($html)) {
|
||||
return $html;
|
||||
// Remove meta tag to prevent external redirects
|
||||
$metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
|
||||
static::removeNodes($metaTags);
|
||||
}
|
||||
|
||||
protected function filterOutNonContentElementsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
// Remove non-content elements
|
||||
$formElements = ['link', 'style', 'meta', 'title', 'template'];
|
||||
foreach ($formElements as $formElement) {
|
||||
$matchingFormElements = $doc->queryXPath('//' . $formElement);
|
||||
static::removeNodes($matchingFormElements);
|
||||
}
|
||||
|
||||
$doc = new HtmlDocument($html);
|
||||
static::removeActiveContentFromDocument($doc);
|
||||
|
||||
return $doc->getBodyInnerHtml();
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use removeActiveContentFromDocument instead.
|
||||
*/
|
||||
public static function removeScriptsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
static::removeActiveContentFromDocument($doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use removeActiveContentFromHtmlString instead.
|
||||
*/
|
||||
public static function removeScriptsFromHtmlString(string $html): string
|
||||
{
|
||||
return static::removeActiveContentFromHtmlString($html);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -147,4 +164,34 @@ class HtmlContentFilter
|
||||
$parentNode->removeAttribute($attrName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use filterDocument instead.
|
||||
*/
|
||||
public static function removeScriptsFromDocument(HtmlDocument $doc): void
|
||||
{
|
||||
$config = new HtmlContentFilterConfig(
|
||||
filterOutNonContentElements: false,
|
||||
useAllowListFilter: false,
|
||||
);
|
||||
$filter = new static($config);
|
||||
$filter->filterDocument($doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias using the old method name to avoid potential compatibility breaks during patch release.
|
||||
* To remove in future feature release.
|
||||
* @deprecated Use filterString instead.
|
||||
*/
|
||||
public static function removeScriptsFromHtmlString(string $html): string
|
||||
{
|
||||
$config = new HtmlContentFilterConfig(
|
||||
filterOutNonContentElements: false,
|
||||
useAllowListFilter: false,
|
||||
);
|
||||
$filter = new static($config);
|
||||
return $filter->filterString($html);
|
||||
}
|
||||
}
|
||||
|
||||
31
app/Util/HtmlContentFilterConfig.php
Normal file
31
app/Util/HtmlContentFilterConfig.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Util;
|
||||
|
||||
readonly class HtmlContentFilterConfig
|
||||
{
|
||||
public function __construct(
|
||||
public bool $filterOutJavaScript = true,
|
||||
public bool $filterOutBadHtmlElements = true,
|
||||
public bool $filterOutFormElements = true,
|
||||
public bool $filterOutNonContentElements = true,
|
||||
public bool $useAllowListFilter = true,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance from a config string, where the string
|
||||
* is a combination of characters to enable filters.
|
||||
*/
|
||||
public static function fromConfigString(string $config): self
|
||||
{
|
||||
$config = strtolower($config);
|
||||
return new self(
|
||||
filterOutJavaScript: str_contains($config, 'j'),
|
||||
filterOutBadHtmlElements: str_contains($config, 'h'),
|
||||
filterOutFormElements: str_contains($config, 'f'),
|
||||
filterOutNonContentElements: str_contains($config, 'h'),
|
||||
useAllowListFilter: str_contains($config, 'a'),
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user