mirror of
https://github.com/BookStackApp/BookStack.git
synced 2026-02-25 03:10:24 +03:00
Added a formal object type to carry across vector search results. Added permission application and entity combining with vector search results. Also updated namespace from vectors to queries.
90 lines
2.6 KiB
PHP
90 lines
2.6 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace BookStack\Search\Queries;
|
|
|
|
use BookStack\Activity\Models\Tag;
|
|
use BookStack\Entities\Models\Entity;
|
|
use BookStack\Search\Queries\Services\VectorQueryService;
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
class EntityVectorGenerator
|
|
{
|
|
public function __construct(
|
|
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
|
) {
|
|
}
|
|
|
|
public function generateAndStore(Entity $entity): void
|
|
{
|
|
$vectorService = $this->vectorQueryServiceProvider->get();
|
|
|
|
$text = $this->entityToPlainText($entity);
|
|
$chunks = $this->chunkText($text);
|
|
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
|
|
|
|
$this->deleteExistingEmbeddingsForEntity($entity);
|
|
$this->storeEmbeddings($embeddings, $chunks, $entity);
|
|
}
|
|
|
|
protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
|
|
{
|
|
SearchVector::query()
|
|
->where('entity_type', '=', $entity->getMorphClass())
|
|
->where('entity_id', '=', $entity->id)
|
|
->delete();
|
|
}
|
|
|
|
protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
|
|
{
|
|
$toInsert = [];
|
|
|
|
foreach ($embeddings as $index => $embedding) {
|
|
$text = $textChunks[$index];
|
|
$toInsert[] = [
|
|
'entity_id' => $entity->id,
|
|
'entity_type' => $entity->getMorphClass(),
|
|
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
|
|
'text' => $text,
|
|
];
|
|
}
|
|
|
|
$chunks = array_chunk($toInsert, 500);
|
|
foreach ($chunks as $chunk) {
|
|
SearchVector::query()->insert($chunk);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param string[] $chunks
|
|
* @return float[] array
|
|
*/
|
|
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
|
|
{
|
|
$embeddings = [];
|
|
foreach ($chunks as $index => $chunk) {
|
|
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
|
|
}
|
|
return $embeddings;
|
|
}
|
|
|
|
/**
|
|
* @return string[]
|
|
*/
|
|
protected function chunkText(string $text): array
|
|
{
|
|
return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text);
|
|
}
|
|
|
|
protected function entityToPlainText(Entity $entity): string
|
|
{
|
|
$tags = $entity->tags()->get();
|
|
$tagText = $tags->map(function (Tag $tag) {
|
|
return $tag->name . ': ' . $tag->value;
|
|
})->join('\n');
|
|
|
|
return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField};
|
|
}
|
|
}
|