feat: use pgvecto.rs (#3605)

This commit is contained in:
Jason Rasmussen
2023-12-08 11:15:46 -05:00
committed by GitHub
parent 429ad28810
commit 1e99ba8167
99 changed files with 1935 additions and 2583 deletions

View File

@@ -0,0 +1,51 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class UsePgVectors1700713871511 implements MigrationInterface {
name = 'UsePgVectors1700713871511';
public async up(queryRunner: QueryRunner): Promise<void> {
const faceDimQuery = await queryRunner.query(`
SELECT CARDINALITY(embedding::real[]) as dimsize
FROM asset_faces
LIMIT 1`);
const clipDimQuery = await queryRunner.query(`
SELECT CARDINALITY("clipEmbedding"::real[]) as dimsize
FROM smart_info
LIMIT 1`);
const faceDimSize = faceDimQuery?.[0]?.['dimsize'] ?? 512;
const clipDimSize = clipDimQuery?.[0]?.['dimsize'] ?? 512;
await queryRunner.query('CREATE EXTENSION IF NOT EXISTS vectors');
await queryRunner.query(`
ALTER TABLE asset_faces
ALTER COLUMN embedding SET NOT NULL,
ALTER COLUMN embedding TYPE vector(${faceDimSize})`);
await queryRunner.query(`
CREATE TABLE smart_search (
"assetId" uuid PRIMARY KEY NOT NULL REFERENCES assets(id) ON DELETE CASCADE,
embedding vector(${clipDimSize}) NOT NULL )`);
await queryRunner.query(`
INSERT INTO smart_search("assetId", embedding)
SELECT si."assetId", si."clipEmbedding"
FROM smart_info si
WHERE "clipEmbedding" IS NOT NULL`);
await queryRunner.query(`ALTER TABLE smart_info DROP COLUMN IF EXISTS "clipEmbedding"`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`ALTER TABLE asset_faces ALTER COLUMN embedding TYPE real array`);
await queryRunner.query(`ALTER TABLE smart_info ADD COLUMN IF NOT EXISTS "clipEmbedding" TYPE real array`);
await queryRunner.query(`
INSERT INTO smart_info
("assetId", "clipEmbedding")
SELECT s."assetId", s.embedding
FROM smart_search s
ON CONFLICT (s."assetId") DO UPDATE SET "clipEmbedding" = s.embedding`);
await queryRunner.query(`DROP TABLE IF EXISTS smart_search`);
}
}

View File

@@ -0,0 +1,19 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class AddCLIPEmbeddingIndex1700713994428 implements MigrationInterface {
name = 'AddCLIPEmbeddingIndex1700713994428';
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`
CREATE INDEX IF NOT EXISTS clip_index ON smart_search
USING vectors (embedding cosine_ops) WITH (options = $$
[indexing.hnsw]
m = 16
ef_construction = 300
$$);`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP INDEX IF EXISTS clip_index`);
}
}

View File

@@ -0,0 +1,19 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class AddFaceEmbeddingIndex1700714033632 implements MigrationInterface {
name = 'AddFaceEmbeddingIndex1700714033632';
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`
CREATE INDEX IF NOT EXISTS face_index ON asset_faces
USING vectors (embedding cosine_ops) WITH (options = $$
[indexing.hnsw]
m = 16
ef_construction = 300
$$);`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP INDEX IF EXISTS face_index`);
}
}

View File

@@ -0,0 +1,13 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class AddSmartInfoTagsIndex1700714072055 implements MigrationInterface {
name = 'AddSmartInfoTagsIndex1700714072055';
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`CREATE INDEX IF NOT EXISTS si_tags ON smart_info USING GIN (tags);`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP INDEX IF EXISTS si_tags;`);
}
}

View File

@@ -0,0 +1,37 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class CreateSmartInfoTextSearchIndex1700714140297 implements MigrationInterface {
name = 'CreateSmartInfoTextSearchIndex1700714140297';
public async up(queryRunner: QueryRunner): Promise<void> {
// https://dba.stackexchange.com/a/164081
await queryRunner.query(`
CREATE OR REPLACE FUNCTION f_concat_ws(text, text[])
RETURNS text
LANGUAGE sql IMMUTABLE PARALLEL SAFE AS
'SELECT array_to_string($2, $1)'`);
await queryRunner.query(`
ALTER TABLE smart_info ADD "smartInfoTextSearchableColumn" tsvector
GENERATED ALWAYS AS (
TO_TSVECTOR(
'english',
f_concat_ws(
' '::text,
COALESCE(tags, array[]::text[]) || COALESCE(objects, array[]::text[])
)
)
)
STORED NOT NULL`);
await queryRunner.query(`
CREATE INDEX smart_info_text_searchable_idx
ON smart_info
USING GIN ("smartInfoTextSearchableColumn")`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP FUNCTION IF EXISTS immutable_concat_ws`);
await queryRunner.query(`ALTER TABLE smart_info DROP IF EXISTS "smartInfoTextSearchableColumn"`);
}
}

View File

@@ -0,0 +1,14 @@
import { MigrationInterface, QueryRunner } from "typeorm";
export class AddExifCityIndex1701665867595 implements MigrationInterface {
name = 'AddExifCityIndex1701665867595'
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`CREATE INDEX "exif_city" ON "exif" ("city") `);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP INDEX "public"."exif_city"`);
}
}