fix(server): add filename search (#6394)

Fixes https://github.com/immich-app/immich/issues/5982.

There are basically three options:

1. Search `originalFileName` by dropping a file extension from the query
(if present). Lower fidelity but very easy - just a standard index &
equality.
2. Search `originalPath` by adding an index on `reverse(originalPath)`
and using `starts_with(reverse(query) + "/", reverse(originalPath)`. A
weird index & query but high fidelity.
3. Add a new generated column called `originalFileNameWithExtension` or
something. More storage, kinda jank.

TBH, I think (1) is good enough and easy to make better in the future.
For example, if I search "DSC_4242.jpg", I don't really think it matters
if "DSC_4242.mov" also shows up.

edit: There's a fourth approach that we discussed a bit in Discord and
decided we could switch to it in the future: using a GIN. The minor
issue is that Postgres doesn't tokenize paths in a useful (they're a
single token and it won't match against partial components). We can
solve that by tokenizing it ourselves. For example:

```
immich=# with vecs as (select to_tsvector('simple', array_to_string(string_to_array('upload/library/sushain/2015/2015-08-09/IMG_275.JPG', '/'), ' ')) as vec)  select * from vecs where vec @@ phraseto_tsquery('simple', array_to_string(string_to_array('library/sushain', '/'), ' '));
                                      vec
-------------------------------------------------------------------------------
 '-08':6 '-09':7 '2015':4,5 'img_275.jpg':8 'library':2 'sushain':3 'upload':1
(1 row)
```

The query is also tokenized with the 'split-by-slash-join-with-space'
strategy. This strategy results in `IMG_275.JPG`, `2015`, `sushain` and
`library/sushain` matching. But, `08` and `IMG_275` do not match. The
former is because the token is `-08` and the latter because the
`img_275.jpg` token is matched against exactly.
This commit is contained in:
Sushain Cherivirala
2024-01-15 12:40:28 -08:00
committed by GitHub
parent f160969894
commit 7fc1954e2a
5 changed files with 108 additions and 9 deletions

View File

@@ -127,6 +127,7 @@ export class AssetEntity {
livePhotoVideoId!: string | null;
@Column({ type: 'varchar' })
@Index()
originalFileName!: string;
@Column({ type: 'varchar', nullable: true })

View File

@@ -0,0 +1,13 @@
import { MigrationInterface, QueryRunner } from 'typeorm';
export class AddOriginalFileNameIndex1705306747072 implements MigrationInterface {
name = 'AddOriginalFileNameIndex1705306747072';
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`CREATE INDEX "IDX_4d66e76dada1ca180f67a205dc" ON "assets" ("originalFileName") `);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`DROP INDEX "public"."IDX_4d66e76dada1ca180f67a205dc"`);
}
}

View File

@@ -24,7 +24,8 @@ import { Injectable } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import _ from 'lodash';
import { DateTime } from 'luxon';
import { And, FindOptionsRelations, FindOptionsWhere, In, IsNull, LessThan, Not, Repository } from 'typeorm';
import path from 'path';
import { And, Brackets, FindOptionsRelations, FindOptionsWhere, In, IsNull, LessThan, Not, Repository } from 'typeorm';
import { AssetEntity, AssetJobStatusEntity, AssetType, ExifEntity, SmartInfoEntity } from '../entities';
import { DummyValue, GenerateSql } from '../infra.util';
import { Chunked, ChunkedArray, OptionalBetween, paginate } from '../infra.utils';
@@ -820,9 +821,13 @@ export class AssetRepository implements IAssetRepository {
.innerJoin('exif', 'e', 'asset."id" = e."assetId"')
.leftJoin('smart_info', 'si', 'si."assetId" = asset."id"')
.andWhere(
`(e."exifTextSearchableColumn" || COALESCE(si."smartInfoTextSearchableColumn", to_tsvector('english', '')))
@@ PLAINTO_TSQUERY('english', :query)`,
{ query },
new Brackets((qb) => {
qb.where(
`(e."exifTextSearchableColumn" || COALESCE(si."smartInfoTextSearchableColumn", to_tsvector('english', '')))
@@ PLAINTO_TSQUERY('english', :query)`,
{ query },
).orWhere('asset."originalFileName" = :path', { path: path.parse(query).name });
}),
)
.addOrderBy('asset.fileCreatedAt', 'DESC')
.limit(numResults)

View File

@@ -765,11 +765,14 @@ WHERE
AND "asset"."ownerId" IN ($1)
AND "asset"."isArchived" = $2
AND (
e."exifTextSearchableColumn" || COALESCE(
si."smartInfoTextSearchableColumn",
to_tsvector('english', '')
)
) @@ PLAINTO_TSQUERY('english', $3)
(
e."exifTextSearchableColumn" || COALESCE(
si."smartInfoTextSearchableColumn",
to_tsvector('english', '')
)
) @@ PLAINTO_TSQUERY('english', $3)
OR asset."originalFileName" = $4
)
)
AND ("asset"."deletedAt" IS NULL)
ORDER BY