2025-11-26 15:45:58 +00:00
import { Injectable } from '@nestjs/common' ;
2025-11-27 12:00:35 +00:00
import { createHash } from 'node:crypto' ;
import { createReadStream } from 'node:fs' ;
2025-11-26 17:36:28 +00:00
import { stat } from 'node:fs/promises' ;
2025-12-17 14:55:38 +00:00
import { basename } from 'node:path' ;
import { Readable , Writable } from 'node:stream' ;
2025-11-27 12:00:35 +00:00
import { pipeline } from 'node:stream/promises' ;
2025-11-26 15:45:58 +00:00
import { JOBS_LIBRARY_PAGINATION_SIZE } from 'src/constants' ;
import { StorageCore } from 'src/cores/storage.core' ;
import { OnEvent , OnJob } from 'src/decorators' ;
2025-12-17 14:55:38 +00:00
import { AuthDto } from 'src/dtos/auth.dto' ;
2025-12-18 14:08:06 +00:00
import {
IntegrityGetReportDto ,
IntegrityReportResponseDto ,
IntegrityReportSummaryResponseDto ,
} from 'src/dtos/integrity.dto' ;
2025-11-27 12:53:04 +00:00
import {
2025-12-02 11:59:23 +00:00
AssetStatus ,
2025-12-17 14:55:38 +00:00
CacheControl ,
2025-11-27 16:05:26 +00:00
DatabaseLock ,
2025-11-27 12:53:04 +00:00
ImmichWorker ,
IntegrityReportType ,
JobName ,
JobStatus ,
QueueName ,
StorageFolder ,
SystemMetadataKey ,
} from 'src/enum' ;
2025-11-26 15:45:58 +00:00
import { ArgOf } from 'src/repositories/event.repository' ;
import { BaseService } from 'src/services/base.service' ;
2025-11-28 17:44:37 +00:00
import {
2025-12-02 11:59:23 +00:00
IIntegrityDeleteReportJob ,
2025-11-28 17:44:37 +00:00
IIntegrityJob ,
2025-12-01 15:49:03 +00:00
IIntegrityMissingFilesJob ,
2025-11-28 17:44:37 +00:00
IIntegrityOrphanedFilesJob ,
IIntegrityPathWithChecksumJob ,
IIntegrityPathWithReportJob ,
} from 'src/types' ;
2025-12-17 14:55:38 +00:00
import { ImmichFileResponse } from 'src/utils/file' ;
2025-11-27 16:05:26 +00:00
import { handlePromiseError } from 'src/utils/misc' ;
2025-11-27 15:40:14 +00:00
2025-12-01 15:49:03 +00:00
/ * *
* Orphan Files :
* Files are detected in / d a t a / e n c o d e d - v i d e o , / d a t a / l i b r a r y , / d a t a / u p l o a d
* Checked against the asset table
* Files are detected in / d a t a / t h u m b s
* Checked against the asset_file table
*
* * Can perform download or delete of files
*
* Missing Files :
* Paths are queried from asset ( originalPath , encodedVideoPath ) , asset_file ( path )
* Check whether files exist on disk
*
* * Reports must include origin ( asset or asset_file ) & ID for further action
2025-12-02 11:59:23 +00:00
* * Can perform trash ( asset ) or delete ( asset_file )
2025-12-01 15:49:03 +00:00
*
* Checksum Mismatch :
* Paths & checksums are queried from asset ( originalPath , checksum )
* Check whether files match checksum , missing files ignored
*
* * Reports must include origin ( as above ) for further action
* * Can perform download or trash ( asset )
* /
2025-11-26 15:45:58 +00:00
@Injectable ( )
export class IntegrityService extends BaseService {
2025-11-27 16:05:26 +00:00
private integrityLock = false ;
2025-11-26 15:45:58 +00:00
@OnEvent ( { name : 'ConfigInit' , workers : [ ImmichWorker . Microservices ] } )
async onConfigInit ( {
newConfig : {
2025-11-27 16:05:26 +00:00
integrityChecks : { orphanedFiles , missingFiles , checksumFiles } ,
2025-11-26 15:45:58 +00:00
} ,
} : ArgOf < 'ConfigInit' > ) {
2025-11-27 16:05:26 +00:00
this . integrityLock = await this . databaseRepository . tryLock ( DatabaseLock . IntegrityCheck ) ;
if ( this . integrityLock ) {
this . cronRepository . create ( {
name : 'integrityOrphanedFiles' ,
expression : orphanedFiles.cronExpression ,
onTick : ( ) = >
handlePromiseError (
this . jobRepository . queue ( { name : JobName.IntegrityOrphanedFilesQueueAll , data : { } } ) ,
this . logger ,
) ,
start : orphanedFiles.enabled ,
} ) ;
this . cronRepository . create ( {
name : 'integrityMissingFiles' ,
expression : missingFiles.cronExpression ,
onTick : ( ) = >
handlePromiseError (
this . jobRepository . queue ( { name : JobName.IntegrityMissingFilesQueueAll , data : { } } ) ,
this . logger ,
) ,
start : missingFiles.enabled ,
} ) ;
this . cronRepository . create ( {
name : 'integrityChecksumFiles' ,
expression : checksumFiles.cronExpression ,
onTick : ( ) = >
handlePromiseError ( this . jobRepository . queue ( { name : JobName.IntegrityChecksumFiles , data : { } } ) , this . logger ) ,
start : checksumFiles.enabled ,
} ) ;
}
2025-12-01 15:49:03 +00:00
// debug: run on boot
setTimeout ( ( ) = > {
void this . jobRepository . queue ( {
name : JobName.IntegrityOrphanedFilesQueueAll ,
data : { } ,
} ) ;
void this . jobRepository . queue ( {
name : JobName.IntegrityMissingFilesQueueAll ,
data : { } ,
} ) ;
void this . jobRepository . queue ( {
name : JobName.IntegrityChecksumFiles ,
data : { } ,
} ) ;
} , 1000 ) ;
2025-11-26 15:45:58 +00:00
}
@OnEvent ( { name : 'ConfigUpdate' , server : true } )
2025-11-27 17:23:54 +00:00
onConfigUpdate ( {
2025-11-27 16:05:26 +00:00
newConfig : {
integrityChecks : { orphanedFiles , missingFiles , checksumFiles } ,
} ,
} : ArgOf < 'ConfigUpdate' > ) {
if ( ! this . integrityLock ) {
return ;
}
this . cronRepository . update ( {
name : 'integrityOrphanedFiles' ,
expression : orphanedFiles.cronExpression ,
start : orphanedFiles.enabled ,
} ) ;
this . cronRepository . update ( {
name : 'integrityMissingFiles' ,
expression : missingFiles.cronExpression ,
start : missingFiles.enabled ,
} ) ;
this . cronRepository . update ( {
name : 'integrityChecksumFiles' ,
expression : checksumFiles.cronExpression ,
start : checksumFiles.enabled ,
} ) ;
2025-11-26 15:45:58 +00:00
}
2025-12-17 15:04:45 +00:00
getIntegrityReportSummary ( ) : Promise < IntegrityReportSummaryResponseDto > {
2025-12-17 14:55:38 +00:00
return this . integrityRepository . getIntegrityReportSummary ( ) ;
}
2025-12-18 14:08:06 +00:00
async getIntegrityReport ( dto : IntegrityGetReportDto ) : Promise < IntegrityReportResponseDto > {
return this . integrityRepository . getIntegrityReports ( { page : dto.page || 1 , size : dto.size || 100 } , dto . type ) ;
2025-12-17 14:55:38 +00:00
}
getIntegrityReportCsv ( type : IntegrityReportType ) : Readable {
2025-12-18 12:56:14 +00:00
const items = this . integrityRepository . streamIntegrityReports ( type ) ;
// very rudimentary csv serialiser
async function * generator() {
yield 'id,type,assetId,fileAssetId,path\n' ;
for await ( const item of items ) {
// no expectation of particularly bad filenames
// but they could potentially have a newline or quote character
yield ` ${ item . id } , ${ item . type } , ${ item . assetId } , ${ item . fileAssetId } ," ${ item . path . replaceAll ( '"' , '""' ) } " \ n ` ;
}
}
return Readable . from ( generator ( ) ) ;
2025-12-17 14:55:38 +00:00
}
async getIntegrityReportFile ( id : string ) : Promise < ImmichFileResponse > {
const { path } = await this . integrityRepository . getById ( id ) ;
return new ImmichFileResponse ( {
path ,
fileName : basename ( path ) ,
contentType : 'application/octet-stream' ,
cacheControl : CacheControl.PrivateWithoutCache ,
} ) ;
}
async deleteIntegrityReport ( auth : AuthDto , id : string ) : Promise < void > {
const { path , assetId , fileAssetId } = await this . integrityRepository . getById ( id ) ;
if ( assetId ) {
await this . assetRepository . updateAll ( [ assetId ] , {
deletedAt : new Date ( ) ,
status : AssetStatus.Trashed ,
} ) ;
await this . eventRepository . emit ( 'AssetTrashAll' , {
assetIds : [ assetId ] ,
userId : auth.user.id ,
} ) ;
await this . integrityRepository . deleteById ( id ) ;
} else if ( fileAssetId ) {
await this . assetRepository . deleteFiles ( [ { id : fileAssetId } ] ) ;
} else {
await this . storageRepository . unlink ( path ) ;
await this . integrityRepository . deleteById ( id ) ;
}
}
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityOrphanedFilesQueueAll , queue : QueueName.IntegrityCheck } )
2025-11-28 15:27:12 +00:00
async handleOrphanedFilesQueueAll ( { refreshOnly } : IIntegrityJob = { } ) : Promise < JobStatus > {
2025-11-27 15:40:14 +00:00
this . logger . log ( ` Checking for out of date orphaned file reports... ` ) ;
2025-12-18 12:56:14 +00:00
const reports = this . integrityRepository . streamIntegrityReportsWithAssetChecksum ( IntegrityReportType . OrphanFile ) ;
2025-11-27 15:40:14 +00:00
let total = 0 ;
for await ( const batchReports of chunk ( reports , JOBS_LIBRARY_PAGINATION_SIZE ) ) {
await this . jobRepository . queue ( {
2025-11-28 17:44:37 +00:00
name : JobName.IntegrityOrphanedFilesRefresh ,
2025-11-27 15:40:14 +00:00
data : {
items : batchReports ,
} ,
} ) ;
total += batchReports . length ;
this . logger . log ( ` Queued report check of ${ batchReports . length } report(s) ( ${ total } so far) ` ) ;
}
2025-11-28 15:27:12 +00:00
if ( refreshOnly ) {
this . logger . log ( 'Refresh complete.' ) ;
return JobStatus . Success ;
}
2025-11-26 17:36:28 +00:00
this . logger . log ( ` Scanning for orphaned files... ` ) ;
const assetPaths = this . storageRepository . walk ( {
pathsToCrawl : [ StorageFolder . EncodedVideo , StorageFolder . Library , StorageFolder . Upload ] . map ( ( folder ) = >
StorageCore . getBaseFolder ( folder ) ,
) ,
includeHidden : false ,
take : JOBS_LIBRARY_PAGINATION_SIZE ,
} ) ;
const assetFilePaths = this . storageRepository . walk ( {
pathsToCrawl : [ StorageCore . getBaseFolder ( StorageFolder . Thumbnails ) ] ,
includeHidden : false ,
take : JOBS_LIBRARY_PAGINATION_SIZE ,
} ) ;
async function * paths() {
for await ( const batch of assetPaths ) {
yield [ 'asset' , batch ] as const ;
}
for await ( const batch of assetFilePaths ) {
yield [ 'asset_file' , batch ] as const ;
}
}
2025-11-27 15:40:14 +00:00
total = 0 ;
2025-11-26 17:36:28 +00:00
for await ( const [ batchType , batchPaths ] of paths ( ) ) {
await this . jobRepository . queue ( {
name : JobName.IntegrityOrphanedFiles ,
data : {
type : batchType ,
paths : batchPaths ,
} ,
} ) ;
const count = batchPaths . length ;
total += count ;
this . logger . log ( ` Queued orphan check of ${ count } file(s) ( ${ total } so far) ` ) ;
}
return JobStatus . Success ;
}
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityOrphanedFiles , queue : QueueName.IntegrityCheck } )
2025-11-26 17:36:28 +00:00
async handleOrphanedFiles ( { type , paths } : IIntegrityOrphanedFilesJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ paths . length } files to check if they are orphaned. ` ) ;
const orphanedFiles = new Set < string > ( paths ) ;
if ( type === 'asset' ) {
2025-12-02 13:15:48 +00:00
const assets = await this . integrityRepository . getAssetPathsByPaths ( paths ) ;
2025-11-26 17:36:28 +00:00
for ( const { originalPath , encodedVideoPath } of assets ) {
orphanedFiles . delete ( originalPath ) ;
if ( encodedVideoPath ) {
orphanedFiles . delete ( encodedVideoPath ) ;
2025-11-26 15:45:58 +00:00
}
2025-11-26 17:36:28 +00:00
}
} else {
2025-12-02 13:15:48 +00:00
const assets = await this . integrityRepository . getAssetFilePathsByPaths ( paths ) ;
2025-11-26 17:36:28 +00:00
for ( const { path } of assets ) {
orphanedFiles . delete ( path ) ;
}
}
2025-12-01 11:51:49 +00:00
if ( orphanedFiles . size > 0 ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . create (
2025-11-28 18:01:24 +00:00
[ . . . orphanedFiles ] . map ( ( path ) = > ( {
type : IntegrityReportType . OrphanFile ,
path ,
} ) ) ,
) ;
}
2025-11-26 17:36:28 +00:00
this . logger . log ( ` Processed ${ paths . length } and found ${ orphanedFiles . size } orphaned file(s). ` ) ;
return JobStatus . Success ;
}
2025-11-26 15:45:58 +00:00
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityOrphanedFilesRefresh , queue : QueueName.IntegrityCheck } )
2025-12-01 15:49:03 +00:00
async handleOrphanedRefresh ( { items } : IIntegrityPathWithReportJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ items . length } reports to check if they are out of date. ` ) ;
2025-11-27 15:40:14 +00:00
const results = await Promise . all (
2025-12-01 15:49:03 +00:00
items . map ( ( { reportId , path } ) = >
2025-11-27 15:40:14 +00:00
stat ( path )
2025-11-28 15:27:12 +00:00
. then ( ( ) = > void 0 )
. catch ( ( ) = > reportId ) ,
2025-11-27 15:40:14 +00:00
) ,
) ;
2025-11-27 17:23:54 +00:00
const reportIds = results . filter ( Boolean ) as string [ ] ;
2025-11-27 15:40:14 +00:00
2025-11-27 17:23:54 +00:00
if ( reportIds . length > 0 ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteByIds ( reportIds ) ;
2025-11-27 15:40:14 +00:00
}
2025-12-01 15:49:03 +00:00
this . logger . log ( ` Processed ${ items . length } paths and found ${ reportIds . length } report(s) out of date. ` ) ;
2025-11-27 15:40:14 +00:00
return JobStatus . Success ;
}
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityMissingFilesQueueAll , queue : QueueName.IntegrityCheck } )
2025-11-28 15:27:12 +00:00
async handleMissingFilesQueueAll ( { refreshOnly } : IIntegrityJob = { } ) : Promise < JobStatus > {
if ( refreshOnly ) {
2025-11-28 17:44:37 +00:00
this . logger . log ( ` Checking for out of date missing file reports... ` ) ;
2025-12-18 12:56:14 +00:00
const reports = this . integrityRepository . streamIntegrityReportsWithAssetChecksum ( IntegrityReportType . MissingFile ) ;
2025-11-28 17:44:37 +00:00
let total = 0 ;
for await ( const batchReports of chunk ( reports , JOBS_LIBRARY_PAGINATION_SIZE ) ) {
await this . jobRepository . queue ( {
name : JobName.IntegrityMissingFilesRefresh ,
data : {
items : batchReports ,
} ,
} ) ;
total += batchReports . length ;
this . logger . log ( ` Queued report check of ${ batchReports . length } report(s) ( ${ total } so far) ` ) ;
}
2025-11-28 15:27:12 +00:00
this . logger . log ( 'Refresh complete.' ) ;
return JobStatus . Success ;
}
2025-11-27 12:00:35 +00:00
this . logger . log ( ` Scanning for missing files... ` ) ;
2025-12-02 13:15:48 +00:00
const assetPaths = this . integrityRepository . streamAssetPaths ( ) ;
2025-11-26 15:45:58 +00:00
2025-11-26 17:36:28 +00:00
let total = 0 ;
2025-11-27 15:13:19 +00:00
for await ( const batchPaths of chunk ( assetPaths , JOBS_LIBRARY_PAGINATION_SIZE ) ) {
2025-11-26 17:36:28 +00:00
await this . jobRepository . queue ( {
name : JobName.IntegrityMissingFiles ,
data : {
2025-11-27 15:40:14 +00:00
items : batchPaths ,
2025-11-26 17:36:28 +00:00
} ,
} ) ;
total += batchPaths . length ;
this . logger . log ( ` Queued missing check of ${ batchPaths . length } file(s) ( ${ total } so far) ` ) ;
}
return JobStatus . Success ;
}
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityMissingFiles , queue : QueueName.IntegrityCheck } )
2025-12-01 15:49:03 +00:00
async handleMissingFiles ( { items } : IIntegrityMissingFilesJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ items . length } files to check if they are missing. ` ) ;
2025-11-26 17:36:28 +00:00
2025-11-27 15:13:19 +00:00
const results = await Promise . all (
2025-12-01 15:49:03 +00:00
items . map ( ( item ) = >
stat ( item . path )
. then ( ( ) = > ( { . . . item , exists : true } ) )
. catch ( ( ) = > ( { . . . item , exists : false } ) ) ,
2025-11-26 17:36:28 +00:00
) ,
) ;
2025-11-27 15:13:19 +00:00
const outdatedReports = results
. filter ( ( { exists , reportId } ) = > exists && reportId )
. map ( ( { reportId } ) = > reportId ! ) ;
2025-11-26 15:45:58 +00:00
2025-11-27 17:23:54 +00:00
if ( outdatedReports . length > 0 ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteByIds ( outdatedReports ) ;
2025-11-27 15:13:19 +00:00
}
const missingFiles = results . filter ( ( { exists } ) = > ! exists ) ;
2025-11-27 17:23:54 +00:00
if ( missingFiles . length > 0 ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . create (
2025-12-01 15:49:03 +00:00
missingFiles . map ( ( { path , assetId , fileAssetId } ) = > ( {
2025-11-27 15:13:19 +00:00
type : IntegrityReportType . MissingFile ,
path ,
2025-12-01 15:49:03 +00:00
assetId ,
fileAssetId ,
2025-11-27 15:13:19 +00:00
} ) ) ,
) ;
}
2025-11-26 15:45:58 +00:00
2025-12-01 15:49:03 +00:00
this . logger . log ( ` Processed ${ items . length } and found ${ missingFiles . length } missing file(s). ` ) ;
2025-11-26 15:45:58 +00:00
return JobStatus . Success ;
}
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityMissingFilesRefresh , queue : QueueName.IntegrityCheck } )
2025-11-28 17:44:37 +00:00
async handleMissingRefresh ( { items : paths } : IIntegrityPathWithReportJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ paths . length } reports to check if they are out of date. ` ) ;
const results = await Promise . all (
paths . map ( ( { reportId , path } ) = >
stat ( path )
. then ( ( ) = > reportId )
. catch ( ( ) = > void 0 ) ,
) ,
) ;
const reportIds = results . filter ( Boolean ) as string [ ] ;
if ( reportIds . length > 0 ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteByIds ( reportIds ) ;
2025-11-28 17:44:37 +00:00
}
this . logger . log ( ` Processed ${ paths . length } paths and found ${ reportIds . length } report(s) out of date. ` ) ;
return JobStatus . Success ;
}
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityChecksumFiles , queue : QueueName.IntegrityCheck } )
2025-11-28 15:27:12 +00:00
async handleChecksumFiles ( { refreshOnly } : IIntegrityJob = { } ) : Promise < JobStatus > {
if ( refreshOnly ) {
2025-11-28 17:44:37 +00:00
this . logger . log ( ` Checking for out of date checksum file reports... ` ) ;
2025-12-18 12:56:14 +00:00
const reports = this . integrityRepository . streamIntegrityReportsWithAssetChecksum (
IntegrityReportType . ChecksumFail ,
) ;
2025-11-28 17:44:37 +00:00
let total = 0 ;
for await ( const batchReports of chunk ( reports , JOBS_LIBRARY_PAGINATION_SIZE ) ) {
await this . jobRepository . queue ( {
name : JobName.IntegrityChecksumFilesRefresh ,
data : {
2025-12-01 11:20:34 +00:00
items : batchReports.map ( ( { path , reportId , checksum } ) = > ( {
path ,
reportId ,
checksum : checksum?.toString ( 'hex' ) ,
} ) ) ,
2025-11-28 17:44:37 +00:00
} ,
} ) ;
total += batchReports . length ;
this . logger . log ( ` Queued report check of ${ batchReports . length } report(s) ( ${ total } so far) ` ) ;
}
2025-11-28 15:27:12 +00:00
this . logger . log ( 'Refresh complete.' ) ;
return JobStatus . Success ;
}
2025-12-01 14:27:04 +00:00
const {
integrityChecks : {
checksumFiles : { timeLimit , percentageLimit } ,
} ,
} = await this . getConfig ( {
withCache : true ,
} ) ;
2025-11-27 12:00:35 +00:00
this . logger . log (
` Checking file checksums... (will run for up to ${ ( timeLimit / ( 60 * 60 * 1000 ) ) . toFixed ( 2 ) } hours or until ${ ( percentageLimit * 100 ) . toFixed ( 2 ) } % of assets are processed) ` ,
) ;
let processed = 0 ;
const startedAt = Date . now ( ) ;
2025-12-02 13:15:48 +00:00
const { count } = await this . integrityRepository . getAssetCount ( ) ;
2025-11-27 12:00:35 +00:00
const checkpoint = await this . systemMetadataRepository . get ( SystemMetadataKey . IntegrityChecksumCheckpoint ) ;
let startMarker : Date | undefined = checkpoint ? . date ? new Date ( checkpoint . date ) : undefined ;
2025-12-01 14:27:04 +00:00
let endMarker : Date | undefined ;
2025-11-27 12:00:35 +00:00
const printStats = ( ) = > {
const averageTime = ( ( Date . now ( ) - startedAt ) / processed ) . toFixed ( 2 ) ;
const completionProgress = ( ( processed / count ) * 100 ) . toFixed ( 2 ) ;
this . logger . log (
` Processed ${ processed } files so far... (avg. ${ averageTime } ms/asset, ${ completionProgress } % of all assets) ` ,
) ;
} ;
let lastCreatedAt : Date | undefined ;
finishEarly : do {
this . logger . log (
` Processing assets in range [ ${ startMarker ? . toISOString ( ) ? ? 'beginning' } , ${ endMarker ? . toISOString ( ) ? ? 'end' } ] ` ,
) ;
2025-12-02 13:15:48 +00:00
const assets = this . integrityRepository . streamAssetChecksums ( startMarker , endMarker ) ;
2025-11-27 12:00:35 +00:00
endMarker = startMarker ;
startMarker = undefined ;
2025-12-01 15:49:03 +00:00
for await ( const { originalPath , checksum , createdAt , assetId , reportId } of assets ) {
2025-11-28 17:44:37 +00:00
processed ++ ;
2025-11-27 12:00:35 +00:00
try {
const hash = createHash ( 'sha1' ) ;
await pipeline ( [
createReadStream ( originalPath ) ,
new Writable ( {
write ( chunk , _encoding , callback ) {
hash . update ( chunk ) ;
callback ( ) ;
} ,
} ) ,
] ) ;
2025-11-27 15:13:19 +00:00
if ( checksum . equals ( hash . digest ( ) ) ) {
if ( reportId ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteById ( reportId ) ;
2025-11-27 15:13:19 +00:00
}
} else {
2025-11-27 12:00:35 +00:00
throw new Error ( 'File failed checksum' ) ;
}
} catch ( error ) {
2025-11-27 15:13:19 +00:00
if ( ( error as { code? : string } ) . code === 'ENOENT' ) {
if ( reportId ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteById ( reportId ) ;
2025-11-27 15:13:19 +00:00
}
// missing file; handled by the missing files job
continue ;
}
2025-11-27 12:00:35 +00:00
this . logger . warn ( 'Failed to process a file: ' + error ) ;
2025-12-02 13:15:48 +00:00
await this . integrityRepository . create ( {
2025-11-27 12:53:04 +00:00
path : originalPath ,
type : IntegrityReportType . ChecksumFail ,
2025-12-01 15:49:03 +00:00
assetId ,
2025-11-27 12:53:04 +00:00
} ) ;
2025-11-27 12:00:35 +00:00
}
if ( processed % 100 === 0 ) {
printStats ( ) ;
}
if ( Date . now ( ) > startedAt + timeLimit || processed > count * percentageLimit ) {
this . logger . log ( 'Reached stop criteria.' ) ;
lastCreatedAt = createdAt ;
break finishEarly ;
}
}
} while ( endMarker ) ;
2025-11-27 17:23:54 +00:00
await this . systemMetadataRepository . set ( SystemMetadataKey . IntegrityChecksumCheckpoint , {
2025-11-27 12:00:35 +00:00
date : lastCreatedAt?.toISOString ( ) ,
} ) ;
printStats ( ) ;
if ( lastCreatedAt ) {
this . logger . log ( ` Finished checksum job, will continue from ${ lastCreatedAt . toISOString ( ) } . ` ) ;
} else {
this . logger . log ( ` Finished checksum job, covered all assets. ` ) ;
}
2025-11-26 15:45:58 +00:00
return JobStatus . Success ;
}
2025-11-28 17:44:37 +00:00
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityChecksumFilesRefresh , queue : QueueName.IntegrityCheck } )
2025-11-28 17:44:37 +00:00
async handleChecksumRefresh ( { items : paths } : IIntegrityPathWithChecksumJob ) : Promise < JobStatus > {
this . logger . log ( ` Processing batch of ${ paths . length } reports to check if they are out of date. ` ) ;
const results = await Promise . all (
paths . map ( async ( { reportId , path , checksum } ) = > {
2025-12-01 11:51:49 +00:00
if ( ! checksum ) {
return reportId ;
}
2025-11-28 17:44:37 +00:00
try {
const hash = createHash ( 'sha1' ) ;
await pipeline ( [
createReadStream ( path ) ,
new Writable ( {
write ( chunk , _encoding , callback ) {
hash . update ( chunk ) ;
callback ( ) ;
} ,
} ) ,
] ) ;
2025-12-01 11:20:34 +00:00
if ( Buffer . from ( checksum , 'hex' ) . equals ( hash . digest ( ) ) ) {
2025-11-28 17:44:37 +00:00
return reportId ;
}
} catch ( error ) {
if ( ( error as { code? : string } ) . code === 'ENOENT' ) {
return reportId ;
}
}
} ) ,
) ;
const reportIds = results . filter ( Boolean ) as string [ ] ;
if ( reportIds . length > 0 ) {
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteByIds ( reportIds ) ;
2025-11-28 17:44:37 +00:00
}
this . logger . log ( ` Processed ${ paths . length } paths and found ${ reportIds . length } report(s) out of date. ` ) ;
return JobStatus . Success ;
}
2025-12-02 11:59:23 +00:00
2025-12-17 14:37:43 +00:00
@OnJob ( { name : JobName.IntegrityReportDelete , queue : QueueName.IntegrityCheck } )
2025-12-02 11:59:23 +00:00
async handleDeleteIntegrityReport ( { type } : IIntegrityDeleteReportJob ) : Promise < JobStatus > {
this . logger . log ( ` Deleting all entries for ${ type ? ? 'all types of' } integrity report ` ) ;
let properties ;
switch ( type ) {
case IntegrityReportType . ChecksumFail : {
properties = [ 'assetId' ] as const ;
break ;
}
case IntegrityReportType . MissingFile : {
properties = [ 'assetId' , 'fileAssetId' ] as const ;
break ;
}
case IntegrityReportType . OrphanFile : {
properties = [ void 0 ] as const ;
break ;
}
default : {
properties = [ void 0 , 'assetId' , 'fileAssetId' ] as const ;
break ;
}
}
for ( const property of properties ) {
2025-12-02 13:15:48 +00:00
const reports = this . integrityRepository . streamIntegrityReportsByProperty ( property , type ) ;
2025-12-02 11:59:23 +00:00
for await ( const report of chunk ( reports , JOBS_LIBRARY_PAGINATION_SIZE ) ) {
// todo: queue sub-job here instead?
switch ( property ) {
case 'assetId' : {
const ids = report . map ( ( { assetId } ) = > assetId ! ) ;
await this . assetRepository . updateAll ( ids , {
deletedAt : new Date ( ) ,
status : AssetStatus.Trashed ,
} ) ;
await this . eventRepository . emit ( 'AssetTrashAll' , {
assetIds : ids ,
userId : '' , // ???
} ) ;
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteByIds ( report . map ( ( { id } ) = > id ) ) ;
2025-12-02 11:59:23 +00:00
break ;
}
case 'fileAssetId' : {
await this . assetRepository . deleteFiles ( report . map ( ( { fileAssetId } ) = > ( { id : fileAssetId ! } ) ) ) ;
break ;
}
default : {
await Promise . all ( report . map ( ( { path } ) = > this . storageRepository . unlink ( path ) . catch ( ( ) = > void 0 ) ) ) ;
2025-12-02 13:15:48 +00:00
await this . integrityRepository . deleteByIds ( report . map ( ( { id } ) = > id ) ) ;
2025-12-02 11:59:23 +00:00
break ;
}
}
}
}
this . logger . log ( 'Finished deleting integrity report.' ) ;
return JobStatus . Success ;
}
2025-11-26 15:45:58 +00:00
}
2025-12-01 15:49:03 +00:00
async function * chunk < T > ( generator : AsyncIterableIterator < T > , n : number ) {
let chunk : T [ ] = [ ] ;
for await ( const item of generator ) {
chunk . push ( item ) ;
if ( chunk . length === n ) {
yield chunk ;
chunk = [ ] ;
}
}
if ( chunk . length > 0 ) {
yield chunk ;
}
}