2025-03-24 16:28:14 +00:00
|
|
|
<?php
|
|
|
|
|
|
2025-08-21 12:14:52 +01:00
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
|
|
namespace BookStack\Search\Queries;
|
2025-03-24 16:28:14 +00:00
|
|
|
|
2025-08-19 11:04:14 +01:00
|
|
|
use BookStack\Activity\Models\Tag;
|
2025-03-24 16:28:14 +00:00
|
|
|
use BookStack\Entities\Models\Entity;
|
2025-08-21 12:14:52 +01:00
|
|
|
use BookStack\Search\Queries\Services\VectorQueryService;
|
2025-03-24 16:28:14 +00:00
|
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
|
|
|
|
|
|
class EntityVectorGenerator
|
|
|
|
|
{
|
|
|
|
|
public function __construct(
|
|
|
|
|
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
|
|
|
|
) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function generateAndStore(Entity $entity): void
|
|
|
|
|
{
|
|
|
|
|
$vectorService = $this->vectorQueryServiceProvider->get();
|
|
|
|
|
|
|
|
|
|
$text = $this->entityToPlainText($entity);
|
|
|
|
|
$chunks = $this->chunkText($text);
|
|
|
|
|
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
|
|
|
|
|
|
|
|
|
|
$this->deleteExistingEmbeddingsForEntity($entity);
|
|
|
|
|
$this->storeEmbeddings($embeddings, $chunks, $entity);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
|
|
|
|
|
{
|
|
|
|
|
SearchVector::query()
|
|
|
|
|
->where('entity_type', '=', $entity->getMorphClass())
|
|
|
|
|
->where('entity_id', '=', $entity->id)
|
|
|
|
|
->delete();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
|
|
|
|
|
{
|
|
|
|
|
$toInsert = [];
|
|
|
|
|
|
|
|
|
|
foreach ($embeddings as $index => $embedding) {
|
|
|
|
|
$text = $textChunks[$index];
|
|
|
|
|
$toInsert[] = [
|
|
|
|
|
'entity_id' => $entity->id,
|
|
|
|
|
'entity_type' => $entity->getMorphClass(),
|
2025-03-24 19:51:48 +00:00
|
|
|
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
|
2025-03-24 16:28:14 +00:00
|
|
|
'text' => $text,
|
|
|
|
|
];
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-19 11:04:14 +01:00
|
|
|
$chunks = array_chunk($toInsert, 500);
|
|
|
|
|
foreach ($chunks as $chunk) {
|
|
|
|
|
SearchVector::query()->insert($chunk);
|
|
|
|
|
}
|
2025-03-24 16:28:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param string[] $chunks
|
|
|
|
|
* @return float[] array
|
|
|
|
|
*/
|
|
|
|
|
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
|
|
|
|
|
{
|
|
|
|
|
$embeddings = [];
|
|
|
|
|
foreach ($chunks as $index => $chunk) {
|
|
|
|
|
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
|
|
|
|
|
}
|
|
|
|
|
return $embeddings;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @return string[]
|
|
|
|
|
*/
|
|
|
|
|
protected function chunkText(string $text): array
|
|
|
|
|
{
|
2025-08-19 11:04:14 +01:00
|
|
|
return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text);
|
2025-03-24 16:28:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected function entityToPlainText(Entity $entity): string
|
|
|
|
|
{
|
2025-08-19 11:04:14 +01:00
|
|
|
$tags = $entity->tags()->get();
|
|
|
|
|
$tagText = $tags->map(function (Tag $tag) {
|
|
|
|
|
return $tag->name . ': ' . $tag->value;
|
|
|
|
|
})->join('\n');
|
|
|
|
|
|
|
|
|
|
return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField};
|
2025-03-24 16:28:14 +00:00
|
|
|
}
|
|
|
|
|
}
|