Merge branch 'v25-12' into development

This commit is contained in:
Dan Brown
2026-02-21 13:59:29 +00:00
25 changed files with 1220 additions and 574 deletions

View File

@@ -8,6 +8,7 @@ use BookStack\Permissions\PermissionApplicator;
use BookStack\Users\Models\HasCreatorAndUpdater;
use BookStack\Users\Models\OwnableInterface;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
@@ -82,7 +83,8 @@ class Comment extends Model implements Loggable, OwnableInterface
public function safeHtml(): string
{
return HtmlContentFilter::removeActiveContentFromHtmlString($this->html ?? '');
$filter = new HtmlContentFilter(new HtmlContentFilterConfig());
return $filter->filterString($this->html ?? '');
}
public function jointPermissions(): HasMany

View File

@@ -37,10 +37,15 @@ return [
// The limit for all uploaded files, including images and attachments in MB.
'upload_limit' => env('FILE_UPLOAD_SIZE_LIMIT', 50),
// Allow <script> tags to entered within page content.
// <script> tags are escaped by default.
// Even when overridden the WYSIWYG editor may still escape script content.
'allow_content_scripts' => env('ALLOW_CONTENT_SCRIPTS', false),
// Control the behaviour of content filtering, primarily used for page content.
// This setting is a string of characters which represent different available filters:
// - j - Filter out JavaScript and unknown binary data based content
// - h - Filter out unexpected, and potentially dangerous, HTML elements
// - f - Filter out unexpected form elements
// - a - Run content through a more complex allowlist filter
// This defaults to using all filters, unless ALLOW_CONTENT_SCRIPTS is set to true in which case no filters are used.
// Note: These filters are a best-attempt and may not be 100% effective. They are typically a layer used in addition to other security measures.
'content_filtering' => env('APP_CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jhfa'),
// Allow server-side fetches to be performed to potentially unknown
// and user-provided locations. Primarily used in exports when loading
@@ -48,8 +53,8 @@ return [
'allow_untrusted_server_fetching' => env('ALLOW_UNTRUSTED_SERVER_FETCHING', false),
// Override the default behaviour for allowing crawlers to crawl the instance.
// May be ignored if view has be overridden or modified.
// Defaults to null since, if not set, 'app-public' status used instead.
// May be ignored if the underlying view has been overridden or modified.
// Defaults to null in which case the 'app-public' status is used instead.
'allow_robots' => env('ALLOW_ROBOTS', null),
// Application Base URL, Used by laravel in development commands

View File

@@ -21,6 +21,8 @@ use BookStack\Exceptions\PermissionsException;
use BookStack\Http\Controller;
use BookStack\Permissions\Permission;
use BookStack\References\ReferenceFetcher;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
use Exception;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
use Illuminate\Http\Request;
@@ -173,7 +175,7 @@ class PageController extends Controller
}
/**
* Get page from an ajax request.
* Get a page from an ajax request.
*
* @throws NotFoundException
*/
@@ -183,6 +185,10 @@ class PageController extends Controller
$page->setHidden(array_diff($page->getHidden(), ['html', 'markdown']));
$page->makeHidden(['book']);
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
$filter = new HtmlContentFilter($filterConfig);
$page->html = $filter->filterString($page->html);
return response()->json($page);
}

View File

@@ -6,6 +6,7 @@ use BookStack\Entities\Models\Book;
use BookStack\Entities\Models\Bookshelf;
use BookStack\Entities\Models\Chapter;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
class EntityHtmlDescription
{
@@ -55,7 +56,8 @@ class EntityHtmlDescription
return '<p></p>';
}
return HtmlContentFilter::removeActiveContentFromHtmlString($html);
$filter = new HtmlContentFilter(new HtmlContentFilterConfig());
return $filter->filterString($html);
}
public function getPlain(): string

View File

@@ -2,6 +2,7 @@
namespace BookStack\Entities\Tools;
use BookStack\App\AppVersion;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Queries\PageQueries;
use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
@@ -13,6 +14,7 @@ use BookStack\Uploads\ImageRepo;
use BookStack\Uploads\ImageService;
use BookStack\Users\Models\User;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
use BookStack\Util\HtmlDocument;
use BookStack\Util\WebSafeMimeSniffer;
use Closure;
@@ -317,11 +319,30 @@ class PageContent
$this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
}
if (!config('app.allow_content_scripts')) {
HtmlContentFilter::removeActiveContentFromDocument($doc);
$cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
$cached = cache()->get($cacheKey, null);
if ($cached !== null) {
return $cached;
}
return $doc->getBodyInnerHtml();
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
$filter = new HtmlContentFilter($filterConfig);
$filtered = $filter->filterDocument($doc);
$cacheTime = 86400 * 7; // 1 week
cache()->put($cacheKey, $filtered, $cacheTime);
return $filtered;
}
protected function getContentCacheKey(string $html): string
{
$contentHash = md5($html);
$contentId = $this->page->id;
$contentTime = $this->page->updated_at?->timestamp ?? time();
$appVersion = AppVersion::get();
$filterConfig = config('app.content_filtering') ?? '';
return "page-content-cache::{$filterConfig}::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}";
}
/**

View File

@@ -8,6 +8,8 @@ use BookStack\Entities\Queries\EntityQueries;
use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
use BookStack\Permissions\Permission;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
class PageEditorData
{
@@ -47,6 +49,7 @@ class PageEditorData
$isDraftRevision = false;
$this->warnings = [];
$editActivity = new PageEditActivity($page);
$lastEditorId = $page->updated_by ?? user()->id;
if ($editActivity->hasActiveEditing()) {
$this->warnings[] = $editActivity->activeEditingMessage();
@@ -58,11 +61,20 @@ class PageEditorData
$page->forceFill($userDraft->only(['name', 'html', 'markdown']));
$isDraftRevision = true;
$this->warnings[] = $editActivity->getEditingActiveDraftMessage($userDraft);
$lastEditorId = $userDraft->created_by;
}
// Get editor type and handle changes
$editorType = $this->getEditorType($page);
$this->updateContentForEditor($page, $editorType);
// Filter HTML content if required
if ($editorType->isHtmlBased() && !old('html') && $lastEditorId !== user()->id) {
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
$filter = new HtmlContentFilter($filterConfig);
$page->html = $filter->filterString($page->html);
}
return [
'page' => $page,
'book' => $page->book,

View File

@@ -4,25 +4,16 @@ namespace BookStack\Theming;
use BookStack\Util\CspService;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
use BookStack\Util\HtmlNonceApplicator;
use Illuminate\Contracts\Cache\Repository as Cache;
class CustomHtmlHeadContentProvider
{
/**
* @var CspService
*/
protected $cspService;
/**
* @var Cache
*/
protected $cache;
public function __construct(CspService $cspService, Cache $cache)
{
$this->cspService = $cspService;
$this->cache = $cache;
public function __construct(
protected CspService $cspService,
protected Cache $cache
) {
}
/**
@@ -50,7 +41,8 @@ class CustomHtmlHeadContentProvider
$hash = md5($content);
return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
return HtmlContentFilter::removeActiveContentFromHtmlString($content);
$config = new HtmlContentFilterConfig(filterOutNonContentElements: false, useAllowListFilter: false);
return (new HtmlContentFilter($config))->filterString($content);
});
}

View File

@@ -0,0 +1,150 @@
<?php
namespace BookStack\Util;
use BookStack\App\AppVersion;
use HTMLPurifier;
use HTMLPurifier_Config;
use HTMLPurifier_DefinitionCache_Serializer;
use HTMLPurifier_HTML5Config;
use HTMLPurifier_HTMLDefinition;
/**
* Provides a configured HTML Purifier instance.
* https://github.com/ezyang/htmlpurifier
* Also uses this to extend support to HTML5 elements:
* https://github.com/xemlock/htmlpurifier-html5
*/
class ConfiguredHtmlPurifier
{
protected HTMLPurifier $purifier;
protected static bool $cachedChecked = false;
public function __construct()
{
// This is done by the web-server at run-time, with the existing
// storage/framework/cache folder to ensure we're using a server-writable folder.
$cachePath = storage_path('framework/cache/purifier');
$this->createCacheFolderIfNeeded($cachePath);
$config = HTMLPurifier_HTML5Config::createDefault();
$this->setConfig($config, $cachePath);
$this->resetCacheIfNeeded($config);
$htmlDef = $config->getDefinition('HTML', true, true);
if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) {
$this->configureDefinition($htmlDef);
}
$this->purifier = new HTMLPurifier($config);
}
protected function createCacheFolderIfNeeded(string $cachePath): void
{
if (!file_exists($cachePath)) {
mkdir($cachePath, 0777, true);
}
}
protected function resetCacheIfNeeded(HTMLPurifier_Config $config): void
{
if (self::$cachedChecked) {
return;
}
$cachedForVersion = cache('htmlpurifier::cache-version');
$appVersion = AppVersion::get();
if ($cachedForVersion !== $appVersion) {
foreach (['HTML', 'CSS', 'URI'] as $name) {
$cache = new HTMLPurifier_DefinitionCache_Serializer($name);
$cache->flush($config);
}
cache()->set('htmlpurifier::cache-version', $appVersion);
}
self::$cachedChecked = true;
}
protected function setConfig(HTMLPurifier_Config $config, string $cachePath): void
{
$config->set('Cache.SerializerPath', $cachePath);
$config->set('Core.AllowHostnameUnderscore', true);
$config->set('CSS.AllowTricky', true);
$config->set('HTML.SafeIframe', true);
$config->set('Attr.EnableID', true);
$config->set('Attr.ID.HTML5', true);
$config->set('Output.FixInnerHTML', false);
$config->set('URI.SafeIframeRegexp', '%^(http://|https://|//)%');
$config->set('URI.AllowedSchemes', [
'http' => true,
'https' => true,
'mailto' => true,
'ftp' => true,
'nntp' => true,
'news' => true,
'tel' => true,
'file' => true,
]);
// $config->set('Cache.DefinitionImpl', null); // Disable cache during testing
}
public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void
{
// Allow the object element
$definition->addElement(
'object',
'Inline',
'Flow',
'Common',
[
'data' => 'URI',
'type' => 'Text',
'width' => 'Length',
'height' => 'Length',
]
);
// Allow the embed element
$definition->addElement(
'embed',
'Inline',
'Empty',
'Common',
[
'src' => 'URI',
'type' => 'Text',
'width' => 'Length',
'height' => 'Length',
]
);
// Allow checkbox inputs
$definition->addElement(
'input',
'Formctrl',
'Empty',
'Common',
[
'checked' => 'Bool#checked',
'disabled' => 'Bool#disabled',
'name' => 'Text',
'readonly' => 'Bool#readonly',
'type' => 'Enum#checkbox',
'value' => 'Text',
]
);
// Allow the drawio-diagram attribute on div elements
$definition->addAttribute(
'div',
'drawio-diagram',
'Number',
);
}
public function purify(string $html): string
{
return $this->purifier->purify($html);
}
}

View File

@@ -65,7 +65,7 @@ class CspService
*/
protected function getScriptSrc(): string
{
if (config('app.allow_content_scripts')) {
if ($this->scriptFilteringDisabled()) {
return '';
}
@@ -108,7 +108,7 @@ class CspService
*/
protected function getObjectSrc(): string
{
if (config('app.allow_content_scripts')) {
if ($this->scriptFilteringDisabled()) {
return '';
}
@@ -124,6 +124,11 @@ class CspService
return "base-uri 'self'";
}
protected function scriptFilteringDisabled(): bool
{
return !HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'))->filterOutJavaScript;
}
protected function getAllowedIframeHosts(): array
{
$hosts = config('app.iframe_hosts') ?? '';

View File

@@ -8,12 +8,46 @@ use DOMNodeList;
class HtmlContentFilter
{
/**
* Remove all active content from the given HTML document.
* This aims to cover anything which can dynamically deal with, or send, data
* like any JavaScript actions or form content.
*/
public static function removeActiveContentFromDocument(HtmlDocument $doc): void
public function __construct(
protected HtmlContentFilterConfig $config
) {
}
public function filterDocument(HtmlDocument $doc): string
{
if ($this->config->filterOutJavaScript) {
$this->filterOutScriptsFromDocument($doc);
}
if ($this->config->filterOutFormElements) {
$this->filterOutFormElementsFromDocument($doc);
}
if ($this->config->filterOutBadHtmlElements) {
$this->filterOutBadHtmlElementsFromDocument($doc);
}
if ($this->config->filterOutNonContentElements) {
$this->filterOutNonContentElementsFromDocument($doc);
}
$filtered = $doc->getBodyInnerHtml();
if ($this->config->useAllowListFilter) {
$filtered = $this->applyAllowListFiltering($filtered);
}
return $filtered;
}
public function filterString(string $html): string
{
return $this->filterDocument(new HtmlDocument($html));
}
protected function applyAllowListFiltering(string $html): string
{
$purifier = new ConfiguredHtmlPurifier();
return $purifier->purify($html);
}
protected function filterOutScriptsFromDocument(HtmlDocument $doc): void
{
// Remove standard script tags
$scriptElems = $doc->queryXPath('//script');
@@ -27,17 +61,17 @@ class HtmlContentFilter
$badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
static::removeNodes($badForms);
// Remove meta tag to prevent external redirects
$metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
static::removeNodes($metaTags);
// Remove data or JavaScript iFrames
// Remove data or JavaScript iFrames & embeds
$badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
static::removeNodes($badIframes);
// Remove data or JavaScript objects
$badObjects = $doc->queryXPath('//*[' . static::xpathContains('@data', 'data:') . '] | //*[' . static::xpathContains('@data', 'javascript:') . ']');
static::removeNodes($badObjects);
// Remove attributes, within svg children, hiding JavaScript or data uris.
// A bunch of svg element and attribute combinations expose xss possibilities.
// For example, SVG animate tag can exploit javascript in values.
// For example, SVG animate tag can exploit JavaScript in values.
$badValuesAttrs = $doc->queryXPath('//svg//@*[' . static::xpathContains('.', 'data:') . '] | //svg//@*[' . static::xpathContains('.', 'javascript:') . ']');
static::removeAttributes($badValuesAttrs);
@@ -49,7 +83,10 @@ class HtmlContentFilter
// Remove 'on*' attributes
$onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
static::removeAttributes($onAttributes);
}
protected function filterOutFormElementsFromDocument(HtmlDocument $doc): void
{
// Remove form elements
$formElements = ['form', 'fieldset', 'button', 'textarea', 'select'];
foreach ($formElements as $formElement) {
@@ -75,41 +112,21 @@ class HtmlContentFilter
}
}
/**
* Remove active content from the given HTML string.
* This aims to cover anything which can dynamically deal with, or send, data
* like any JavaScript actions or form content.
*/
public static function removeActiveContentFromHtmlString(string $html): string
protected function filterOutBadHtmlElementsFromDocument(HtmlDocument $doc): void
{
if (empty($html)) {
return $html;
// Remove meta tag to prevent external redirects
$metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
static::removeNodes($metaTags);
}
protected function filterOutNonContentElementsFromDocument(HtmlDocument $doc): void
{
// Remove non-content elements
$formElements = ['link', 'style', 'meta', 'title', 'template'];
foreach ($formElements as $formElement) {
$matchingFormElements = $doc->queryXPath('//' . $formElement);
static::removeNodes($matchingFormElements);
}
$doc = new HtmlDocument($html);
static::removeActiveContentFromDocument($doc);
return $doc->getBodyInnerHtml();
}
/**
* Alias using the old method name to avoid potential compatibility breaks during patch release.
* To remove in future feature release.
* @deprecated Use removeActiveContentFromDocument instead.
*/
public static function removeScriptsFromDocument(HtmlDocument $doc): void
{
static::removeActiveContentFromDocument($doc);
}
/**
* Alias using the old method name to avoid potential compatibility breaks during patch release.
* To remove in future feature release.
* @deprecated Use removeActiveContentFromHtmlString instead.
*/
public static function removeScriptsFromHtmlString(string $html): string
{
return static::removeActiveContentFromHtmlString($html);
}
/**
@@ -147,4 +164,34 @@ class HtmlContentFilter
$parentNode->removeAttribute($attrName);
}
}
/**
* Alias using the old method name to avoid potential compatibility breaks during patch release.
* To remove in future feature release.
* @deprecated Use filterDocument instead.
*/
public static function removeScriptsFromDocument(HtmlDocument $doc): void
{
$config = new HtmlContentFilterConfig(
filterOutNonContentElements: false,
useAllowListFilter: false,
);
$filter = new self($config);
$filter->filterDocument($doc);
}
/**
* Alias using the old method name to avoid potential compatibility breaks during patch release.
* To remove in future feature release.
* @deprecated Use filterString instead.
*/
public static function removeScriptsFromHtmlString(string $html): string
{
$config = new HtmlContentFilterConfig(
filterOutNonContentElements: false,
useAllowListFilter: false,
);
$filter = new self($config);
return $filter->filterString($html);
}
}

View File

@@ -0,0 +1,31 @@
<?php
namespace BookStack\Util;
readonly class HtmlContentFilterConfig
{
public function __construct(
public bool $filterOutJavaScript = true,
public bool $filterOutBadHtmlElements = true,
public bool $filterOutFormElements = true,
public bool $filterOutNonContentElements = true,
public bool $useAllowListFilter = true,
) {
}
/**
* Create an instance from a config string, where the string
* is a combination of characters to enable filters.
*/
public static function fromConfigString(string $config): self
{
$config = strtolower($config);
return new self(
filterOutJavaScript: str_contains($config, 'j'),
filterOutBadHtmlElements: str_contains($config, 'h'),
filterOutFormElements: str_contains($config, 'f'),
filterOutNonContentElements: str_contains($config, 'h'),
useAllowListFilter: str_contains($config, 'a'),
);
}
}

View File

@@ -103,7 +103,13 @@ class HtmlDocument
*/
public function getBody(): DOMNode
{
return $this->document->getElementsByTagName('body')[0];
$bodies = $this->document->getElementsByTagName('body');
if ($bodies->length === 0) {
return new DOMElement('body', '');
}
return $bodies[0];
}
/**