Last active
June 20, 2020 10:50
-
-
Save fredeil/439e3ccfccc4e1a61259fcf2b9cf1acf to your computer and use it in GitHub Desktop.
Cleaning up orphaned chunks in GridFS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using MongoDB.Bson; | |
using MongoDB.Driver; | |
using Serilog; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Threading; | |
using System.Threading.Tasks; | |
namespace Mongo | |
{ | |
internal class Program | |
{ | |
private static async Task<int> Main(string[] args) | |
{ | |
Log.Logger = new LoggerConfiguration() | |
.MinimumLevel.Debug() | |
.WriteTo.Console() | |
.WriteTo.File("mongo.txt") | |
.CreateLogger(); | |
var mongoSettings = MongoClientSettings.FromConnectionString(""); | |
var mongoClient = new MongoClient(mongoSettings); | |
var mongoDatabase = mongoClient.GetDatabase("packagesDb"); | |
var chunksCollection = mongoDatabase.GetCollection<BsonDocument>("packages.chunks"); | |
var filesCollection = mongoDatabase.GetCollection<BsonDocument>("packages.files"); | |
using var cts = new CancellationTokenSource(); | |
Console.CancelKeyPress += (sender, e) => | |
{ | |
e.Cancel = true; | |
cts.Cancel(); | |
}; | |
var sw = new Stopwatch(); | |
var numberOfChunksDeleted = 0UL; | |
try | |
{ | |
var chunkFilter = Builders<BsonDocument>.Filter.Empty; | |
using var cursor = await chunksCollection.FindAsync(chunkFilter, new FindOptions<BsonDocument, BsonDocument>() | |
{ | |
BatchSize = 1000, | |
Skip = 0, | |
Limit = 100_000, | |
//Sort = Builders<BsonDocument>.Sort.Descending("_id"), | |
Projection = Builders<BsonDocument>.Projection.Exclude("data") | |
}, cts.Token); | |
sw.Start(); | |
var uniqueFiles = new HashSet<string>(); | |
while (await cursor.MoveNextAsync(cts.Token)) | |
{ | |
var batch = cursor.Current; | |
foreach (var gridFSChunk in batch) | |
{ | |
var objecdId = gridFSChunk.GetValue("_id").AsObjectId; | |
uniqueFiles.Add(gridFSChunk.GetValue("files_id").AsObjectId.ToString()); | |
} | |
} | |
foreach (var files_id in uniqueFiles) | |
{ | |
var filter = Builders<BsonDocument>.Filter.Eq("_id", new ObjectId(files_id)); | |
var count = await filesCollection.CountDocumentsAsync(filter, null, cts.Token); | |
if (count > 0) | |
{ | |
continue; | |
} | |
var deleteFilter = Builders<BsonDocument>.Filter.Eq("files_id", new ObjectId(files_id)); | |
var deleteResult = await chunksCollection.DeleteManyAsync(deleteFilter, null,cts.Token); | |
numberOfChunksDeleted += (ulong)deleteResult.DeletedCount; | |
Log.Information("{0},{1}", deleteResult, files_id); | |
} | |
} | |
catch (Exception ex) | |
{ | |
Log.Error(ex, "Something went wrong."); | |
} | |
finally | |
{ | |
sw.Stop(); | |
Log.Information("Number of chunks deleted: {0}.", numberOfChunksDeleted); | |
Log.Information("Elapsed time: {0} minutes.", sw.Elapsed.TotalMinutes); | |
Log.CloseAndFlush(); | |
} | |
return 0; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See https://github.com/Dualog-students/GridFSCleaner