mirror of
https://github.com/BookStackApp/BookStack.git
synced 2026-02-05 08:39:55 +03:00
Compare commits
9 Commits
l10n_devel
...
vectors
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e0edb63c7 | ||
|
|
bb08f62327 | ||
|
|
8eef5a1ee7 | ||
|
|
88ccd9e5b9 | ||
|
|
2c3100e401 | ||
|
|
54f883e815 | ||
|
|
e611b3239e | ||
|
|
b9ecf55e1f | ||
|
|
2d5548240a |
@@ -22,6 +22,18 @@ return [
|
||||
// Callback URL for social authentication methods
|
||||
'callback_url' => env('APP_URL', false),
|
||||
|
||||
// LLM Service
|
||||
// Options: openai
|
||||
'llm' => env('LLM_SERVICE', ''),
|
||||
|
||||
// OpenAI API-compatible service details
|
||||
'openai' => [
|
||||
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
|
||||
'key' => env('OPENAI_KEY', ''),
|
||||
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
|
||||
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
|
||||
],
|
||||
|
||||
'github' => [
|
||||
'client_id' => env('GITHUB_APP_ID', false),
|
||||
'client_secret' => env('GITHUB_APP_SECRET', false),
|
||||
|
||||
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
46
app/Console/Commands/RegenerateVectorsCommand.php
Normal file
@@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Console\Commands;
|
||||
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Queries\SearchVector;
|
||||
use BookStack\Search\Queries\StoreEntityVectorsJob;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class RegenerateVectorsCommand extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'bookstack:regenerate-vectors';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Re-index vectors for all content in the system';
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*/
|
||||
public function handle(EntityProvider $entityProvider)
|
||||
{
|
||||
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
|
||||
SearchVector::query()->delete();
|
||||
|
||||
$types = $entityProvider->all();
|
||||
foreach ($types as $type => $typeInstance) {
|
||||
$this->info("Creating jobs to store vectors for {$type} data...");
|
||||
/** @var Entity[] $entities */
|
||||
$typeInstance->newQuery()->chunkById(100, function ($entities) {
|
||||
foreach ($entities as $entity) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
89
app/Search/Queries/EntityVectorGenerator.php
Normal file
89
app/Search/Queries/EntityVectorGenerator.php
Normal file
@@ -0,0 +1,89 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Activity\Models\Tag;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Queries\Services\VectorQueryService;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
class EntityVectorGenerator
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
||||
) {
|
||||
}
|
||||
|
||||
public function generateAndStore(Entity $entity): void
|
||||
{
|
||||
$vectorService = $this->vectorQueryServiceProvider->get();
|
||||
|
||||
$text = $this->entityToPlainText($entity);
|
||||
$chunks = $this->chunkText($text);
|
||||
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
|
||||
|
||||
$this->deleteExistingEmbeddingsForEntity($entity);
|
||||
$this->storeEmbeddings($embeddings, $chunks, $entity);
|
||||
}
|
||||
|
||||
protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
|
||||
{
|
||||
SearchVector::query()
|
||||
->where('entity_type', '=', $entity->getMorphClass())
|
||||
->where('entity_id', '=', $entity->id)
|
||||
->delete();
|
||||
}
|
||||
|
||||
protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
|
||||
{
|
||||
$toInsert = [];
|
||||
|
||||
foreach ($embeddings as $index => $embedding) {
|
||||
$text = $textChunks[$index];
|
||||
$toInsert[] = [
|
||||
'entity_id' => $entity->id,
|
||||
'entity_type' => $entity->getMorphClass(),
|
||||
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
|
||||
'text' => $text,
|
||||
];
|
||||
}
|
||||
|
||||
$chunks = array_chunk($toInsert, 500);
|
||||
foreach ($chunks as $chunk) {
|
||||
SearchVector::query()->insert($chunk);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $chunks
|
||||
* @return float[] array
|
||||
*/
|
||||
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
|
||||
{
|
||||
$embeddings = [];
|
||||
foreach ($chunks as $index => $chunk) {
|
||||
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
|
||||
}
|
||||
return $embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
protected function chunkText(string $text): array
|
||||
{
|
||||
return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text);
|
||||
}
|
||||
|
||||
protected function entityToPlainText(Entity $entity): string
|
||||
{
|
||||
$tags = $entity->tags()->get();
|
||||
$tagText = $tags->map(function (Tag $tag) {
|
||||
return $tag->name . ': ' . $tag->value;
|
||||
})->join('\n');
|
||||
|
||||
return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField};
|
||||
}
|
||||
}
|
||||
26
app/Search/Queries/LlmQueryRunner.php
Normal file
26
app/Search/Queries/LlmQueryRunner.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use Exception;
|
||||
|
||||
class LlmQueryRunner
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a query against the configured LLM to produce a text response.
|
||||
* @param VectorSearchResult[] $vectorResults
|
||||
* @throws Exception
|
||||
*/
|
||||
public function run(string $query, array $vectorResults): string
|
||||
{
|
||||
$queryService = $this->vectorQueryServiceProvider->get();
|
||||
|
||||
$matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults));
|
||||
return $queryService->query($query, $matchesText);
|
||||
}
|
||||
}
|
||||
61
app/Search/Queries/QueryController.php
Normal file
61
app/Search/Queries/QueryController.php
Normal file
@@ -0,0 +1,61 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Http\Controller;
|
||||
use BookStack\Search\SearchRunner;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
class QueryController extends Controller
|
||||
{
|
||||
public function __construct(
|
||||
protected SearchRunner $searchRunner,
|
||||
) {
|
||||
// TODO - Check via testing
|
||||
$this->middleware(function ($request, $next) {
|
||||
if (!VectorQueryServiceProvider::isEnabled()) {
|
||||
$this->showPermissionError('/');
|
||||
}
|
||||
return $next($request);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Show the view to start a vector/LLM-based query search.
|
||||
*/
|
||||
public function show(Request $request)
|
||||
{
|
||||
$query = $request->get('ask', '');
|
||||
|
||||
// TODO - Set page title
|
||||
|
||||
return view('search.query', [
|
||||
'query' => $query,
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a vector/LLM-based query search.
|
||||
*/
|
||||
public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner)
|
||||
{
|
||||
// TODO - Rate limiting
|
||||
$query = $request->get('query', '');
|
||||
|
||||
return response()->eventStream(function () use ($query, $searchRunner, $llmRunner) {
|
||||
$results = $query ? $searchRunner->run($query) : [];
|
||||
|
||||
$entities = [];
|
||||
foreach ($results as $result) {
|
||||
$entityKey = $result->entity->getMorphClass() . ':' . $result->entity->id;
|
||||
if (!isset($entities[$entityKey])) {
|
||||
$entities[$entityKey] = $result->entity;
|
||||
}
|
||||
}
|
||||
|
||||
yield ['view' => view('entities.list', ['entities' => $entities])->render()];
|
||||
|
||||
yield ['result' => $llmRunner->run($query, $results)];
|
||||
});
|
||||
}
|
||||
}
|
||||
26
app/Search/Queries/SearchVector.php
Normal file
26
app/Search/Queries/SearchVector.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Permissions\Models\JointPermission;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||
|
||||
/**
|
||||
* @property string $entity_type
|
||||
* @property int $entity_id
|
||||
* @property string $text
|
||||
* @property string $embedding
|
||||
*/
|
||||
class SearchVector extends Model
|
||||
{
|
||||
public $timestamps = false;
|
||||
|
||||
public function jointPermissions(): HasMany
|
||||
{
|
||||
return $this->hasMany(JointPermission::class, 'entity_id', 'entity_id')
|
||||
->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type');
|
||||
}
|
||||
}
|
||||
66
app/Search/Queries/Services/OpenAiVectorQueryService.php
Normal file
66
app/Search/Queries/Services/OpenAiVectorQueryService.php
Normal file
@@ -0,0 +1,66 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries\Services;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
|
||||
class OpenAiVectorQueryService implements VectorQueryService
|
||||
{
|
||||
protected string $key;
|
||||
protected string $endpoint;
|
||||
protected string $embeddingModel;
|
||||
protected string $queryModel;
|
||||
|
||||
public function __construct(
|
||||
protected array $options,
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
// TODO - Some kind of validation of options
|
||||
$this->key = $this->options['key'] ?? '';
|
||||
$this->endpoint = $this->options['endpoint'] ?? '';
|
||||
$this->embeddingModel = $this->options['embedding_model'] ?? '';
|
||||
$this->queryModel = $this->options['query_model'] ?? '';
|
||||
}
|
||||
|
||||
protected function jsonRequest(string $method, string $uri, array $data): array
|
||||
{
|
||||
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
|
||||
$client = $this->http->buildClient(30);
|
||||
$request = $this->http->jsonRequest($method, $fullUrl, $data)
|
||||
->withHeader('Authorization', 'Bearer ' . $this->key);
|
||||
|
||||
$response = $client->sendRequest($request);
|
||||
return json_decode($response->getBody()->getContents(), true);
|
||||
}
|
||||
|
||||
public function generateEmbeddings(string $text): array
|
||||
{
|
||||
$response = $this->jsonRequest('POST', 'v1/embeddings', [
|
||||
'input' => $text,
|
||||
'model' => $this->embeddingModel,
|
||||
]);
|
||||
|
||||
return $response['data'][0]['embedding'];
|
||||
}
|
||||
|
||||
public function query(string $input, array $context): string
|
||||
{
|
||||
$formattedContext = implode("\n", $context);
|
||||
|
||||
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
|
||||
'model' => $this->queryModel,
|
||||
'messages' => [
|
||||
[
|
||||
'role' => 'developer',
|
||||
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
|
||||
],
|
||||
[
|
||||
'role' => 'user',
|
||||
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
|
||||
]
|
||||
],
|
||||
]);
|
||||
|
||||
return $response['choices'][0]['message']['content'] ?? '';
|
||||
}
|
||||
}
|
||||
21
app/Search/Queries/Services/VectorQueryService.php
Normal file
21
app/Search/Queries/Services/VectorQueryService.php
Normal file
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries\Services;
|
||||
|
||||
interface VectorQueryService
|
||||
{
|
||||
/**
|
||||
* Generate embedding vectors from the given chunk of text.
|
||||
* @return float[]
|
||||
*/
|
||||
public function generateEmbeddings(string $text): array;
|
||||
|
||||
/**
|
||||
* Query the LLM service using the given user input, and
|
||||
* relevant context text retrieved locally via a vector search.
|
||||
* Returns the response output text from the LLM.
|
||||
*
|
||||
* @param string[] $context
|
||||
*/
|
||||
public function query(string $input, array $context): string;
|
||||
}
|
||||
30
app/Search/Queries/StoreEntityVectorsJob.php
Normal file
30
app/Search/Queries/StoreEntityVectorsJob.php
Normal file
@@ -0,0 +1,30 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Queue\Queueable;
|
||||
|
||||
class StoreEntityVectorsJob implements ShouldQueue
|
||||
{
|
||||
use Queueable;
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
*/
|
||||
public function __construct(
|
||||
protected Entity $entity
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the job.
|
||||
*/
|
||||
public function handle(EntityVectorGenerator $generator): void
|
||||
{
|
||||
$generator->generateAndStore($this->entity);
|
||||
}
|
||||
}
|
||||
79
app/Search/Queries/TextChunker.php
Normal file
79
app/Search/Queries/TextChunker.php
Normal file
@@ -0,0 +1,79 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use InvalidArgumentException;
|
||||
|
||||
/**
|
||||
* Splits a given string into smaller chunks based on specified delimiters
|
||||
* and a predefined maximum chunk size. This will work through the given delimiters
|
||||
* to break down text further and further to fit into the chunk size.
|
||||
*
|
||||
* The last delimiter is always an empty string to ensure text can always be broken down.
|
||||
*/
|
||||
class TextChunker
|
||||
{
|
||||
public function __construct(
|
||||
protected int $chunkSize,
|
||||
protected array $delimiterOrder,
|
||||
) {
|
||||
if (count($this->delimiterOrder) === 0 || $this->delimiterOrder[count($this->delimiterOrder) - 1] !== '') {
|
||||
$this->delimiterOrder[] = '';
|
||||
}
|
||||
|
||||
if ($this->chunkSize < 1) {
|
||||
throw new InvalidArgumentException('Chunk size must be greater than 0');
|
||||
}
|
||||
}
|
||||
|
||||
public function chunk(string $text): array
|
||||
{
|
||||
$delimiter = $this->delimiterOrder[0];
|
||||
$delimiterLength = strlen($delimiter);
|
||||
$lines = ($delimiter === '') ? str_split($text, $this->chunkSize) : explode($delimiter, $text);
|
||||
|
||||
$cChunk = ''; // Current chunk
|
||||
$cLength = 0; // Current chunk length
|
||||
$chunks = []; // Chunks to return
|
||||
$lDelim = ''; // Last delimiter
|
||||
|
||||
foreach ($lines as $index => $line) {
|
||||
$lineLength = strlen($line);
|
||||
if ($cLength + $lineLength + $delimiterLength <= $this->chunkSize) {
|
||||
$cChunk .= $line . $delimiter;
|
||||
$cLength += $lineLength + $delimiterLength;
|
||||
$lDelim = $delimiter;
|
||||
} else if ($lineLength <= $this->chunkSize) {
|
||||
$chunks[] = trim($cChunk, $delimiter);
|
||||
$cChunk = $line . $delimiter;
|
||||
$cLength = $lineLength + $delimiterLength;
|
||||
$lDelim = $delimiter;
|
||||
} else {
|
||||
$subChunks = new static($this->chunkSize, array_slice($this->delimiterOrder, 1));
|
||||
$subDelimiter = $this->delimiterOrder[1] ?? '';
|
||||
$subDelimiterLength = strlen($subDelimiter);
|
||||
foreach ($subChunks->chunk($line) as $subChunk) {
|
||||
$chunkLength = strlen($subChunk);
|
||||
if ($cLength + $chunkLength + $subDelimiterLength <= $this->chunkSize) {
|
||||
$cChunk .= $subChunk . $subDelimiter;
|
||||
$cLength += $chunkLength + $subDelimiterLength;
|
||||
$lDelim = $subDelimiter;
|
||||
} else {
|
||||
$chunks[] = trim($cChunk, $lDelim);
|
||||
$cChunk = $subChunk . $subDelimiter;
|
||||
$cLength = $chunkLength + $subDelimiterLength;
|
||||
$lDelim = $subDelimiter;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($cChunk !== '') {
|
||||
$chunks[] = trim($cChunk, $lDelim);
|
||||
}
|
||||
|
||||
return $chunks;
|
||||
}
|
||||
}
|
||||
38
app/Search/Queries/VectorQueryServiceProvider.php
Normal file
38
app/Search/Queries/VectorQueryServiceProvider.php
Normal file
@@ -0,0 +1,38 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
use BookStack\Search\Queries\Services\OpenAiVectorQueryService;
|
||||
use BookStack\Search\Queries\Services\VectorQueryService;
|
||||
|
||||
class VectorQueryServiceProvider
|
||||
{
|
||||
public function __construct(
|
||||
protected HttpRequestService $http,
|
||||
) {
|
||||
}
|
||||
|
||||
public function get(): VectorQueryService
|
||||
{
|
||||
$service = $this->getServiceName();
|
||||
|
||||
if ($service === 'openai') {
|
||||
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
|
||||
}
|
||||
|
||||
throw new \Exception("No '{$service}' LLM service found");
|
||||
}
|
||||
|
||||
protected static function getServiceName(): string
|
||||
{
|
||||
return strtolower(config('services.llm'));
|
||||
}
|
||||
|
||||
public static function isEnabled(): bool
|
||||
{
|
||||
return !empty(static::getServiceName());
|
||||
}
|
||||
}
|
||||
17
app/Search/Queries/VectorSearchResult.php
Normal file
17
app/Search/Queries/VectorSearchResult.php
Normal file
@@ -0,0 +1,17 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
|
||||
readonly class VectorSearchResult
|
||||
{
|
||||
public function __construct(
|
||||
public Entity $entity,
|
||||
public float $distance,
|
||||
public string $matchText
|
||||
) {
|
||||
}
|
||||
}
|
||||
54
app/Search/Queries/VectorSearchRunner.php
Normal file
54
app/Search/Queries/VectorSearchRunner.php
Normal file
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Entities\Tools\MixedEntityListLoader;
|
||||
use BookStack\Permissions\PermissionApplicator;
|
||||
use Exception;
|
||||
|
||||
class VectorSearchRunner
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
|
||||
protected PermissionApplicator $permissions,
|
||||
protected MixedEntityListLoader $entityLoader,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a vector search query to find results across entities.
|
||||
* @return VectorSearchResult[]
|
||||
* @throws Exception
|
||||
*/
|
||||
public function run(string $query): array
|
||||
{
|
||||
$queryService = $this->vectorQueryServiceProvider->get();
|
||||
$queryVector = $queryService->generateEmbeddings($query);
|
||||
|
||||
// TODO - Test permissions applied
|
||||
$topMatchesQuery = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
||||
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
||||
->orderBy('distance', 'asc')
|
||||
->having('distance', '<', 0.6)
|
||||
->limit(10);
|
||||
|
||||
$query = $this->permissions->restrictEntityRelationQuery($topMatchesQuery, 'search_vectors', 'entity_id', 'entity_type');
|
||||
$topMatches = $query->get();
|
||||
|
||||
$this->entityLoader->loadIntoRelations($topMatches->all(), 'entity', true);
|
||||
|
||||
$results = [];
|
||||
|
||||
foreach ($topMatches as $match) {
|
||||
if ($match->relationLoaded('entity')) {
|
||||
$results[] = new VectorSearchResult(
|
||||
$match->getRelation('entity'),
|
||||
$match->getAttribute('distance'),
|
||||
$match->getAttribute('text'),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ use BookStack\Entities\Queries\PageQueries;
|
||||
use BookStack\Entities\Queries\QueryPopular;
|
||||
use BookStack\Entities\Tools\SiblingFetcher;
|
||||
use BookStack\Http\Controller;
|
||||
use BookStack\Search\Queries\VectorSearchRunner;
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Pagination\LengthAwarePaginator;
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@ use BookStack\Activity\Models\Tag;
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Entities\Models\Page;
|
||||
use BookStack\Search\Queries\StoreEntityVectorsJob;
|
||||
use BookStack\Search\Queries\VectorQueryServiceProvider;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use DOMNode;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
@@ -25,7 +27,7 @@ class SearchIndex
|
||||
public static string $softDelimiters = ".-";
|
||||
|
||||
public function __construct(
|
||||
protected EntityProvider $entityProvider
|
||||
protected EntityProvider $entityProvider,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -37,6 +39,10 @@ class SearchIndex
|
||||
$this->deleteEntityTerms($entity);
|
||||
$terms = $this->entityToTermDataArray($entity);
|
||||
$this->insertTerms($terms);
|
||||
|
||||
if (VectorQueryServiceProvider::isEnabled()) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -47,9 +53,15 @@ class SearchIndex
|
||||
public function indexEntities(array $entities): void
|
||||
{
|
||||
$terms = [];
|
||||
$vectorQueryEnabled = VectorQueryServiceProvider::isEnabled();
|
||||
|
||||
foreach ($entities as $entity) {
|
||||
$entityTerms = $this->entityToTermDataArray($entity);
|
||||
array_push($terms, ...$entityTerms);
|
||||
|
||||
if ($vectorQueryEnabled) {
|
||||
dispatch(new StoreEntityVectorsJob($entity));
|
||||
}
|
||||
}
|
||||
|
||||
$this->insertTerms($terms);
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
// TODO - Handle compatibility with older databases that don't support vectors
|
||||
Schema::create('search_vectors', function (Blueprint $table) {
|
||||
$table->string('entity_type', 100);
|
||||
$table->integer('entity_id');
|
||||
$table->text('text');
|
||||
|
||||
$table->index(['entity_type', 'entity_id']);
|
||||
});
|
||||
|
||||
$table = DB::getTablePrefix() . 'search_vectors';
|
||||
|
||||
// TODO - Vector size might need to be dynamic
|
||||
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
|
||||
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('search_vectors');
|
||||
}
|
||||
};
|
||||
22
package-lock.json
generated
22
package-lock.json
generated
@@ -23,6 +23,7 @@
|
||||
"@ssddanbrown/codemirror-lang-twig": "^1.0.0",
|
||||
"@types/jest": "^30.0.0",
|
||||
"codemirror": "^6.0.2",
|
||||
"eventsource-client": "^1.1.4",
|
||||
"idb-keyval": "^6.2.2",
|
||||
"markdown-it": "^14.1.0",
|
||||
"markdown-it-task-lists": "^2.1.1",
|
||||
@@ -4797,6 +4798,27 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eventsource-client": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/eventsource-client/-/eventsource-client-1.2.0.tgz",
|
||||
"integrity": "sha512-kDI75RSzO3TwyG/K9w1ap8XwqSPcwi6jaMkNulfVeZmSeUM49U8kUzk1s+vKNt0tGrXgK47i+620Yasn1ccFiw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"eventsource-parser": "^3.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/eventsource-parser": {
|
||||
"version": "3.0.6",
|
||||
"resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz",
|
||||
"integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/execa": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz",
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
"@ssddanbrown/codemirror-lang-twig": "^1.0.0",
|
||||
"@types/jest": "^30.0.0",
|
||||
"codemirror": "^6.0.2",
|
||||
"eventsource-client": "^1.1.4",
|
||||
"idb-keyval": "^6.2.2",
|
||||
"markdown-it": "^14.1.0",
|
||||
"markdown-it-task-lists": "^2.1.1",
|
||||
|
||||
@@ -45,6 +45,7 @@ export {PagePicker} from './page-picker';
|
||||
export {PermissionsTable} from './permissions-table';
|
||||
export {Pointer} from './pointer';
|
||||
export {Popup} from './popup';
|
||||
export {QueryManager} from './query-manager';
|
||||
export {SettingAppColorScheme} from './setting-app-color-scheme';
|
||||
export {SettingColorPicker} from './setting-color-picker';
|
||||
export {SettingHomepageControl} from './setting-homepage-control';
|
||||
|
||||
77
resources/js/components/query-manager.ts
Normal file
77
resources/js/components/query-manager.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import {Component} from "./component";
|
||||
|
||||
export class QueryManager extends Component {
|
||||
protected input!: HTMLTextAreaElement;
|
||||
protected generatedLoading!: HTMLElement;
|
||||
protected generatedDisplay!: HTMLElement;
|
||||
protected contentLoading!: HTMLElement;
|
||||
protected contentDisplay!: HTMLElement;
|
||||
protected form!: HTMLFormElement;
|
||||
protected fieldset!: HTMLFieldSetElement;
|
||||
|
||||
setup() {
|
||||
this.input = this.$refs.input as HTMLTextAreaElement;
|
||||
this.form = this.$refs.form as HTMLFormElement;
|
||||
this.fieldset = this.$refs.fieldset as HTMLFieldSetElement;
|
||||
this.generatedLoading = this.$refs.generatedLoading;
|
||||
this.generatedDisplay = this.$refs.generatedDisplay;
|
||||
this.contentLoading = this.$refs.contentLoading;
|
||||
this.contentDisplay = this.$refs.contentDisplay;
|
||||
|
||||
this.setupListeners();
|
||||
|
||||
// Start lookup if a query is set
|
||||
if (this.input.value.trim() !== '') {
|
||||
this.runQuery();
|
||||
}
|
||||
}
|
||||
|
||||
protected setupListeners(): void {
|
||||
// Handle form submission
|
||||
this.form.addEventListener('submit', event => {
|
||||
event.preventDefault();
|
||||
this.runQuery();
|
||||
});
|
||||
|
||||
// Allow Ctrl+Enter to run a query
|
||||
this.input.addEventListener('keydown', event => {
|
||||
if (event.key === 'Enter' && event.ctrlKey && this.input.value.trim() !== '') {
|
||||
this.runQuery();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected async runQuery(): Promise<void> {
|
||||
this.contentLoading.hidden = false;
|
||||
this.generatedLoading.hidden = false;
|
||||
this.contentDisplay.innerHTML = '';
|
||||
this.generatedDisplay.innerHTML = '';
|
||||
this.fieldset.disabled = true;
|
||||
|
||||
const query = this.input.value.trim();
|
||||
const url = new URL(window.location.href);
|
||||
url.searchParams.set('ask', query);
|
||||
window.history.pushState({}, '', url.toString());
|
||||
|
||||
const es = window.$http.eventSource('/query', 'POST', {query});
|
||||
|
||||
let messageCount = 0;
|
||||
for await (const {data, event, id} of es) {
|
||||
messageCount++;
|
||||
if (messageCount === 1) {
|
||||
// Entity results
|
||||
this.contentDisplay.innerHTML = JSON.parse(data).view;
|
||||
this.contentLoading.hidden = true;
|
||||
} else if (messageCount === 2) {
|
||||
// LLM Output
|
||||
this.generatedDisplay.innerText = JSON.parse(data).result;
|
||||
this.generatedLoading.hidden = true;
|
||||
} else {
|
||||
es.close();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this.fieldset.disabled = false;
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
import {createEventSource, EventSourceClient} from "eventsource-client";
|
||||
|
||||
type ResponseData = Record<any, any>|string;
|
||||
|
||||
type RequestOptions = {
|
||||
@@ -59,7 +61,6 @@ export class HttpManager {
|
||||
}
|
||||
|
||||
createXMLHttpRequest(method: string, url: string, events: Record<string, (e: Event) => void> = {}): XMLHttpRequest {
|
||||
const csrfToken = document.querySelector('meta[name=token]')?.getAttribute('content');
|
||||
const req = new XMLHttpRequest();
|
||||
|
||||
for (const [eventName, callback] of Object.entries(events)) {
|
||||
@@ -68,7 +69,7 @@ export class HttpManager {
|
||||
|
||||
req.open(method, url);
|
||||
req.withCredentials = true;
|
||||
req.setRequestHeader('X-CSRF-TOKEN', csrfToken || '');
|
||||
req.setRequestHeader('X-CSRF-TOKEN', this.getCSRFToken());
|
||||
|
||||
return req;
|
||||
}
|
||||
@@ -95,12 +96,11 @@ export class HttpManager {
|
||||
requestUrl = urlObj.toString();
|
||||
}
|
||||
|
||||
const csrfToken = document.querySelector('meta[name=token]')?.getAttribute('content') || '';
|
||||
const requestOptions: RequestInit = {...options, credentials: 'same-origin'};
|
||||
requestOptions.headers = {
|
||||
...requestOptions.headers || {},
|
||||
baseURL: window.baseUrl(''),
|
||||
'X-CSRF-TOKEN': csrfToken,
|
||||
'X-CSRF-TOKEN': this.getCSRFToken(),
|
||||
};
|
||||
|
||||
const response = await fetch(requestUrl, requestOptions);
|
||||
@@ -191,6 +191,27 @@ export class HttpManager {
|
||||
return this.dataRequest('DELETE', url, data);
|
||||
}
|
||||
|
||||
eventSource(url: string, method: string = 'GET', body: object = {}): EventSourceClient {
|
||||
if (!url.startsWith('http')) {
|
||||
url = window.baseUrl(url);
|
||||
}
|
||||
|
||||
return createEventSource({
|
||||
url,
|
||||
method,
|
||||
body: JSON.stringify(body),
|
||||
credentials: 'same-origin',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRF-TOKEN': this.getCSRFToken(),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected getCSRFToken(): string {
|
||||
return document.querySelector('meta[name=token]')?.getAttribute('content') || '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the response text for an error response to a user
|
||||
* presentable string. Handles a range of errors responses including
|
||||
|
||||
@@ -601,3 +601,29 @@ input.shortcut-input {
|
||||
max-width: 120px;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.query-form {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: vars.$m;
|
||||
textarea {
|
||||
font-size: 1.4rem;
|
||||
height: 100px;
|
||||
box-shadow: vars.$bs-card;
|
||||
border-radius: 8px;
|
||||
color: #444;
|
||||
}
|
||||
button {
|
||||
align-self: start;
|
||||
margin: 0;
|
||||
font-size: 1.6rem;
|
||||
}
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
textarea:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
}
|
||||
52
resources/views/search/query.blade.php
Normal file
52
resources/views/search/query.blade.php
Normal file
@@ -0,0 +1,52 @@
|
||||
@extends('layouts.simple')
|
||||
|
||||
@section('body')
|
||||
<div component="query-manager" class="container small pt-xxl">
|
||||
|
||||
<div class="card content-wrap auto-height">
|
||||
<h1 class="list-heading">Start a Query</h1>
|
||||
<form action="{{ url('/query') }}"
|
||||
refs="query-manager@form"
|
||||
title="Run Query"
|
||||
method="post">
|
||||
<fieldset class="query-form" refs="query-manager@fieldset">
|
||||
<textarea name="query"
|
||||
refs="query-manager@input"
|
||||
class="input-fill-width"
|
||||
rows="5"
|
||||
placeholder="Enter a query"
|
||||
autocomplete="off">{{ $query }}</textarea>
|
||||
<button class="button icon">@icon('search')</button>
|
||||
</fieldset>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="card content-wrap auto-height pb-xl">
|
||||
<h2 class="list-heading">Generated Response</h2>
|
||||
<div refs="query-manager@generated-loading" hidden>
|
||||
@include('common.loading-icon')
|
||||
</div>
|
||||
<p refs="query-manager@generated-display">
|
||||
<span class="text-muted italic">
|
||||
When you run a query, the relevant content found & shown below will be used to help generate a smart machine generated response.
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="card content-wrap auto-height pb-xl">
|
||||
<h2 class="list-heading">Relevant Content</h2>
|
||||
<div refs="query-manager@content-loading" hidden>
|
||||
@include('common.loading-icon')
|
||||
</div>
|
||||
<div class="book-contents">
|
||||
<div refs="query-manager@content-display" class="entity-list">
|
||||
<p class="text-muted italic mx-m">
|
||||
Start a query to find relevant matching content.
|
||||
The items shown here reflect those used to help provide the above response.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@stop
|
||||
@@ -11,6 +11,7 @@ use BookStack\Exports\Controllers as ExportControllers;
|
||||
use BookStack\Http\Middleware\VerifyCsrfToken;
|
||||
use BookStack\Permissions\PermissionsController;
|
||||
use BookStack\References\ReferenceController;
|
||||
use BookStack\Search\Queries\QueryController;
|
||||
use BookStack\Search\SearchController;
|
||||
use BookStack\Settings as SettingControllers;
|
||||
use BookStack\Sorting as SortingControllers;
|
||||
@@ -196,6 +197,11 @@ Route::middleware('auth')->group(function () {
|
||||
Route::get('/search/entity-selector-templates', [SearchController::class, 'templatesForSelector']);
|
||||
Route::get('/search/suggest', [SearchController::class, 'searchSuggestions']);
|
||||
|
||||
// Queries
|
||||
Route::get('/query', [QueryController::class, 'show']);
|
||||
Route::get('/query/run', [QueryController::class, 'run']); // TODO - Development only, remove
|
||||
Route::post('/query', [QueryController::class, 'run']);
|
||||
|
||||
// User Search
|
||||
Route::get('/search/users/select', [UserControllers\UserSearchController::class, 'forSelect']);
|
||||
Route::get('/search/users/mention', [UserControllers\UserSearchController::class, 'forMentions']);
|
||||
|
||||
47
tests/Search/TextChunkerTest.php
Normal file
47
tests/Search/TextChunkerTest.php
Normal file
@@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
namespace Search;
|
||||
|
||||
use BookStack\Search\Queries\TextChunker;
|
||||
use Tests\TestCase;
|
||||
|
||||
class TextChunkerTest extends TestCase
|
||||
{
|
||||
public function test_it_chunks_text()
|
||||
{
|
||||
$chunker = new TextChunker(3, []);
|
||||
$chunks = $chunker->chunk('123456789');
|
||||
|
||||
$this->assertEquals(['123', '456', '789'], $chunks);
|
||||
}
|
||||
|
||||
public function test_chunk_size_must_be_greater_than_zero()
|
||||
{
|
||||
$this->expectException(\InvalidArgumentException::class);
|
||||
$chunker = new TextChunker(-5, []);
|
||||
}
|
||||
|
||||
public function test_it_works_through_given_delimiters()
|
||||
{
|
||||
$chunker = new TextChunker(5, ['-', '.', '']);
|
||||
$chunks = $chunker->chunk('12-3456.789abcdefg');
|
||||
|
||||
$this->assertEquals(['12', '3456', '789ab', 'cdefg'], $chunks);
|
||||
}
|
||||
|
||||
public function test_it_attempts_to_pack_chunks()
|
||||
{
|
||||
$chunker = new TextChunker(8, [' ', '']);
|
||||
$chunks = $chunker->chunk('123 456 789 abc def');
|
||||
|
||||
$this->assertEquals(['123 456', '789 abc', 'def'], $chunks);
|
||||
}
|
||||
|
||||
public function test_it_attempts_to_pack_using_subchunks()
|
||||
{
|
||||
$chunker = new TextChunker(8, [' ', '-', '']);
|
||||
$chunks = $chunker->chunk('123 456-789abc');
|
||||
|
||||
$this->assertEquals(['123 456', '789abc'], $chunks);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user