2025-11-26 15:45:58 +00:00
import { Injectable } from '@nestjs/common' ;
2025-11-27 12:00:35 +00:00
import { createHash } from 'node:crypto' ;
import { createReadStream } from 'node:fs' ;
2025-11-26 17:36:28 +00:00
import { stat } from 'node:fs/promises' ;
2025-11-27 12:00:35 +00:00
import { Writable } from 'node:stream' ;
import { pipeline } from 'node:stream/promises' ;
2025-11-26 15:45:58 +00:00
import { JOBS_LIBRARY_PAGINATION_SIZE } from 'src/constants' ;
import { StorageCore } from 'src/cores/storage.core' ;
import { OnEvent , OnJob } from 'src/decorators' ;
2025-11-27 12:53:04 +00:00
import {
ImmichWorker ,
IntegrityReportType ,
JobName ,
JobStatus ,
QueueName ,
StorageFolder ,
SystemMetadataKey ,
} from 'src/enum' ;
2025-11-26 15:45:58 +00:00
import { ArgOf } from 'src/repositories/event.repository' ;
import { BaseService } from 'src/services/base.service' ;
2025-11-26 17:36:28 +00:00
import { IIntegrityMissingFilesJob , IIntegrityOrphanedFilesJob } from 'src/types' ;
2025-11-26 15:45:58 +00:00
@Injectable ( )
export class IntegrityService extends BaseService {
// private backupLock = false;
@OnEvent ( { name : 'ConfigInit' , workers : [ ImmichWorker . Microservices ] } )
async onConfigInit ( {
newConfig : {
backup : { database } ,
} ,
} : ArgOf < 'ConfigInit' > ) {
// this.backupLock = await this.databaseRepository.tryLock(DatabaseLock.BackupDatabase);
// if (this.backupLock) {
// this.cronRepository.create({
// name: 'backupDatabase',
// expression: database.cronExpression,
// onTick: () => handlePromiseError(this.jobRepository.queue({ name: JobName.DatabaseBackup }), this.logger),
// start: database.enabled,
// });
// }
2025-11-27 12:53:04 +00:00
2025-11-26 15:45:58 +00:00
setTimeout ( ( ) = > {
2025-11-27 12:53:04 +00:00
this . jobRepository . queue ( {
name : JobName.IntegrityOrphanedFilesQueueAll ,
data : { } ,
} ) ;
2025-11-27 12:00:35 +00:00
2025-11-27 12:53:04 +00:00
this . jobRepository . queue ( {
name : JobName.IntegrityMissingFilesQueueAll ,
data : { } ,
} ) ;
2025-11-26 17:36:28 +00:00
this . jobRepository . queue ( {
2025-11-27 12:00:35 +00:00
name : JobName.IntegrityChecksumFiles ,
2025-11-26 15:45:58 +00:00
data : { } ,
} ) ;
} , 1000 ) ;
}
@OnEvent ( { name : 'ConfigUpdate' , server : true } )
async onConfigUpdate ( { newConfig : { backup } } : ArgOf < 'ConfigUpdate' > ) {
// if (!this.backupLock) {
// return;
// }
// this.cronRepository.update({
// name: 'backupDatabase',
// expression: backup.database.cronExpression,
// start: backup.database.enabled,
// });
}
2025-11-26 17:36:28 +00:00
@OnJob ( { name : JobName.IntegrityOrphanedFilesQueueAll , queue : QueueName.BackgroundTask } )
async handleOrphanedFilesQueueAll ( ) : Promise < JobStatus > {
this . logger . log ( ` Scanning for orphaned files... ` ) ;
const assetPaths = this . storageRepository . walk ( {
pathsToCrawl : [ StorageFolder . EncodedVideo , StorageFolder . Library , StorageFolder . Upload ] . map ( ( folder ) = >
StorageCore . getBaseFolder ( folder ) ,
) ,
includeHidden : false ,
take : JOBS_LIBRARY_PAGINATION_SIZE ,
} ) ;
const assetFilePaths = this . storageRepository . walk ( {
pathsToCrawl : [ StorageCore . getBaseFolder ( StorageFolder . Thumbnails ) ] ,
includeHidden : false ,
take : JOBS_LIBRARY_PAGINATION_SIZE ,
} ) ;
async function * paths() {
for await ( const batch of assetPaths ) {
yield [ 'asset' , batch ] as const ;
}
for await ( const batch of assetFilePaths ) {
yield [ 'asset_file' , batch ] as const ;
}
}
let total = 0 ;
for await ( const [ batchType , batchPaths ] of paths ( ) ) {
await this . jobRepository . queue ( {
name : JobName.IntegrityOrphanedFiles ,
data : {
type : batchType ,
paths : batchPaths ,
} ,
} ) ;
const count = batchPaths . length ;
total += count ;
this . logger . log ( ` Queued orphan check of ${ count } file(s) ( ${ total } so far) ` ) ;
}
return JobStatus . Success ;
}
@OnJob ( { name : JobName.IntegrityOrphanedFiles , queue : QueueName.BackgroundTask } )
async handleOrphanedFiles ( { type , paths } : IIntegrityOrphanedFilesJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ paths . length } files to check if they are orphaned. ` ) ;
const orphanedFiles = new Set < string > ( paths ) ;
if ( type === 'asset' ) {
const assets = await this . assetJobRepository . getAssetPathsByPaths ( paths ) ;
for ( const { originalPath , encodedVideoPath } of assets ) {
orphanedFiles . delete ( originalPath ) ;
if ( encodedVideoPath ) {
orphanedFiles . delete ( encodedVideoPath ) ;
2025-11-26 15:45:58 +00:00
}
2025-11-26 17:36:28 +00:00
}
} else {
const assets = await this . assetJobRepository . getAssetFilePathsByPaths ( paths ) ;
for ( const { path } of assets ) {
orphanedFiles . delete ( path ) ;
}
}
2025-11-27 12:53:04 +00:00
await this . integrityReportRepository . create (
[ . . . orphanedFiles ] . map ( ( path ) = > ( {
type : IntegrityReportType . OrphanFile ,
path ,
} ) ) ,
) ;
2025-11-26 17:36:28 +00:00
this . logger . log ( ` Processed ${ paths . length } and found ${ orphanedFiles . size } orphaned file(s). ` ) ;
return JobStatus . Success ;
}
2025-11-26 15:45:58 +00:00
2025-11-26 17:36:28 +00:00
@OnJob ( { name : JobName.IntegrityMissingFilesQueueAll , queue : QueueName.BackgroundTask } )
async handleMissingFilesQueueAll ( ) : Promise < JobStatus > {
2025-11-27 12:00:35 +00:00
this . logger . log ( ` Scanning for missing files... ` ) ;
2025-11-26 17:36:28 +00:00
const assetPaths = this . assetJobRepository . streamAssetPaths ( ) ;
2025-11-26 15:45:58 +00:00
2025-11-27 15:13:19 +00:00
async function * chunk < T > ( generator : AsyncIterableIterator < T > , n : number ) {
2025-11-26 17:36:28 +00:00
let chunk : T [ ] = [ ] ;
for await ( const item of generator ) {
chunk . push ( item ) ;
2025-11-26 15:45:58 +00:00
2025-11-26 17:36:28 +00:00
if ( chunk . length === n ) {
yield chunk ;
chunk = [ ] ;
2025-11-26 15:45:58 +00:00
}
2025-11-26 17:36:28 +00:00
}
if ( chunk . length ) {
yield chunk ;
}
}
let total = 0 ;
2025-11-27 15:13:19 +00:00
for await ( const batchPaths of chunk ( assetPaths , JOBS_LIBRARY_PAGINATION_SIZE ) ) {
2025-11-26 17:36:28 +00:00
await this . jobRepository . queue ( {
name : JobName.IntegrityMissingFiles ,
data : {
paths : batchPaths ,
} ,
} ) ;
total += batchPaths . length ;
this . logger . log ( ` Queued missing check of ${ batchPaths . length } file(s) ( ${ total } so far) ` ) ;
}
return JobStatus . Success ;
}
@OnJob ( { name : JobName.IntegrityMissingFiles , queue : QueueName.BackgroundTask } )
async handleMissingFiles ( { paths } : IIntegrityMissingFilesJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ paths . length } files to check if they are missing. ` ) ;
2025-11-27 15:13:19 +00:00
const results = await Promise . all (
paths . map ( ( file ) = >
stat ( file . path )
. then ( ( ) = > ( { . . . file , exists : true } ) )
. catch ( ( ) = > ( { . . . file , exists : false } ) ) ,
2025-11-26 17:36:28 +00:00
) ,
) ;
2025-11-27 15:13:19 +00:00
const outdatedReports = results
. filter ( ( { exists , reportId } ) = > exists && reportId )
. map ( ( { reportId } ) = > reportId ! ) ;
2025-11-26 15:45:58 +00:00
2025-11-27 15:13:19 +00:00
if ( outdatedReports . length ) {
await this . integrityReportRepository . deleteByIds ( outdatedReports ) ;
}
const missingFiles = results . filter ( ( { exists } ) = > ! exists ) ;
if ( missingFiles . length ) {
await this . integrityReportRepository . create (
missingFiles . map ( ( { path } ) = > ( {
type : IntegrityReportType . MissingFile ,
path ,
} ) ) ,
) ;
}
2025-11-26 15:45:58 +00:00
2025-11-26 17:36:28 +00:00
this . logger . log ( ` Processed ${ paths . length } and found ${ missingFiles . length } missing file(s). ` ) ;
2025-11-26 15:45:58 +00:00
return JobStatus . Success ;
}
@OnJob ( { name : JobName.IntegrityChecksumFiles , queue : QueueName.BackgroundTask } )
async handleChecksumFiles ( ) : Promise < JobStatus > {
2025-11-27 12:00:35 +00:00
const timeLimit = 60 * 60 * 1000 ; // 1000;
const percentageLimit = 1.0 ; // 0.25;
this . logger . log (
` Checking file checksums... (will run for up to ${ ( timeLimit / ( 60 * 60 * 1000 ) ) . toFixed ( 2 ) } hours or until ${ ( percentageLimit * 100 ) . toFixed ( 2 ) } % of assets are processed) ` ,
) ;
let processed = 0 ;
const startedAt = Date . now ( ) ;
const { count } = await this . assetJobRepository . getAssetCount ( ) ;
const checkpoint = await this . systemMetadataRepository . get ( SystemMetadataKey . IntegrityChecksumCheckpoint ) ;
let startMarker : Date | undefined = checkpoint ? . date ? new Date ( checkpoint . date ) : undefined ;
let endMarker : Date | undefined ; // todo
const printStats = ( ) = > {
const averageTime = ( ( Date . now ( ) - startedAt ) / processed ) . toFixed ( 2 ) ;
const completionProgress = ( ( processed / count ) * 100 ) . toFixed ( 2 ) ;
this . logger . log (
` Processed ${ processed } files so far... (avg. ${ averageTime } ms/asset, ${ completionProgress } % of all assets) ` ,
) ;
} ;
let lastCreatedAt : Date | undefined ;
finishEarly : do {
this . logger . log (
` Processing assets in range [ ${ startMarker ? . toISOString ( ) ? ? 'beginning' } , ${ endMarker ? . toISOString ( ) ? ? 'end' } ] ` ,
) ;
const assets = this . assetJobRepository . streamAssetChecksums ( startMarker , endMarker ) ;
endMarker = startMarker ;
startMarker = undefined ;
2025-11-27 15:13:19 +00:00
for await ( const { originalPath , checksum , createdAt , reportId } of assets ) {
2025-11-27 12:00:35 +00:00
try {
const hash = createHash ( 'sha1' ) ;
await pipeline ( [
createReadStream ( originalPath ) ,
new Writable ( {
write ( chunk , _encoding , callback ) {
hash . update ( chunk ) ;
callback ( ) ;
} ,
} ) ,
] ) ;
2025-11-27 15:13:19 +00:00
if ( checksum . equals ( hash . digest ( ) ) ) {
if ( reportId ) {
await this . integrityReportRepository . deleteById ( reportId ) ;
}
} else {
2025-11-27 12:00:35 +00:00
throw new Error ( 'File failed checksum' ) ;
}
} catch ( error ) {
2025-11-27 15:13:19 +00:00
if ( ( error as { code? : string } ) . code === 'ENOENT' ) {
if ( reportId ) {
await this . integrityReportRepository . deleteById ( reportId ) ;
}
// missing file; handled by the missing files job
continue ;
}
2025-11-27 12:00:35 +00:00
this . logger . warn ( 'Failed to process a file: ' + error ) ;
2025-11-27 12:53:04 +00:00
await this . integrityReportRepository . create ( {
path : originalPath ,
type : IntegrityReportType . ChecksumFail ,
} ) ;
2025-11-27 12:00:35 +00:00
}
processed ++ ;
if ( processed % 100 === 0 ) {
printStats ( ) ;
}
if ( Date . now ( ) > startedAt + timeLimit || processed > count * percentageLimit ) {
this . logger . log ( 'Reached stop criteria.' ) ;
lastCreatedAt = createdAt ;
break finishEarly ;
}
}
} while ( endMarker ) ;
this . systemMetadataRepository . set ( SystemMetadataKey . IntegrityChecksumCheckpoint , {
date : lastCreatedAt?.toISOString ( ) ,
} ) ;
printStats ( ) ;
if ( lastCreatedAt ) {
this . logger . log ( ` Finished checksum job, will continue from ${ lastCreatedAt . toISOString ( ) } . ` ) ;
} else {
this . logger . log ( ` Finished checksum job, covered all assets. ` ) ;
}
2025-11-26 15:45:58 +00:00
return JobStatus . Success ;
}
}