mirror of
https://github.com/immich-app/immich.git
synced 2025-12-18 17:23:16 +03:00
fix(server): use bigrams for cjk (#24285)
* use bigrams for cjk * update sql * linting * actually migrate ocr * fix backwards test * use array * tweaks
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
import { Kysely, sql } from 'kysely';
|
||||
import { tokenizeForSearch } from 'src/utils/database';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
await sql`truncate ${sql.table('ocr_search')}`.execute(db);
|
||||
const batch = [];
|
||||
for await (const { assetId, text } of db
|
||||
.selectFrom('asset_ocr')
|
||||
.select(['assetId', sql<string>`string_agg(text, ' ')`.as('text')])
|
||||
.groupBy('assetId')
|
||||
.stream()) {
|
||||
batch.push({ assetId, text: tokenizeForSearch(text) });
|
||||
if (batch.length >= 5000) {
|
||||
await db.insertInto('ocr_search').values(batch).execute();
|
||||
batch.length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (batch.length > 0) {
|
||||
await db.insertInto('ocr_search').values(batch).execute();
|
||||
}
|
||||
}
|
||||
|
||||
export async function down(): Promise<void> {}
|
||||
Reference in New Issue
Block a user