Last active
June 20, 2020 10:50
-
-
Save fredeil/439e3ccfccc4e1a61259fcf2b9cf1acf to your computer and use it in GitHub Desktop.
Cleaning up orphaned chunks in GridFS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using MongoDB.Bson; | |
using MongoDB.Driver; | |
using Serilog; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Threading; | |
using System.Threading.Tasks; | |
namespace Mongo | |
{ | |
internal class Program | |
{ | |
private static async Task<int> Main(string[] args) | |
{ | |
Log.Logger = new LoggerConfiguration() | |
.MinimumLevel.Debug() | |
.WriteTo.Console() | |
.WriteTo.File("mongo.txt") | |
.CreateLogger(); | |
var mongoSettings = MongoClientSettings.FromConnectionString(""); | |
var mongoClient = new MongoClient(mongoSettings); | |
var mongoDatabase = mongoClient.GetDatabase("packagesDb"); | |
var chunksCollection = mongoDatabase.GetCollection<BsonDocument>("packages.chunks"); | |
var filesCollection = mongoDatabase.GetCollection<BsonDocument>("packages.files"); | |
using var cts = new CancellationTokenSource(); | |
Console.CancelKeyPress += (sender, e) => | |
{ | |
e.Cancel = true; | |
cts.Cancel(); | |
}; | |
var sw = new Stopwatch(); | |
var numberOfChunksDeleted = 0UL; | |
try | |
{ | |
var chunkFilter = Builders<BsonDocument>.Filter.Empty; | |
using var cursor = await chunksCollection.FindAsync(chunkFilter, new FindOptions<BsonDocument, BsonDocument>() | |
{ | |
BatchSize = 1000, | |
Skip = 0, | |
Limit = 100_000, | |
//Sort = Builders<BsonDocument>.Sort.Descending("_id"), | |
Projection = Builders<BsonDocument>.Projection.Exclude("data") | |
}, cts.Token); | |
sw.Start(); | |
var uniqueFiles = new HashSet<string>(); | |
while (await cursor.MoveNextAsync(cts.Token)) | |
{ | |
var batch = cursor.Current; | |
foreach (var gridFSChunk in batch) | |
{ | |
var objecdId = gridFSChunk.GetValue("_id").AsObjectId; | |
uniqueFiles.Add(gridFSChunk.GetValue("files_id").AsObjectId.ToString()); | |
} | |
} | |
foreach (var files_id in uniqueFiles) | |
{ | |
var filter = Builders<BsonDocument>.Filter.Eq("_id", new ObjectId(files_id)); | |
var count = await filesCollection.CountDocumentsAsync(filter, null, cts.Token); | |
if (count > 0) | |
{ | |
continue; | |
} | |
var deleteFilter = Builders<BsonDocument>.Filter.Eq("files_id", new ObjectId(files_id)); | |
var deleteResult = await chunksCollection.DeleteManyAsync(deleteFilter, null,cts.Token); | |
numberOfChunksDeleted += (ulong)deleteResult.DeletedCount; | |
Log.Information("{0},{1}", deleteResult, files_id); | |
} | |
} | |
catch (Exception ex) | |
{ | |
Log.Error(ex, "Something went wrong."); | |
} | |
finally | |
{ | |
sw.Stop(); | |
Log.Information("Number of chunks deleted: {0}.", numberOfChunksDeleted); | |
Log.Information("Elapsed time: {0} minutes.", sw.Elapsed.TotalMinutes); | |
Log.CloseAndFlush(); | |
} | |
return 0; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
That means it is taking longer than 10 minutes between calls to
MoveNextAsync()
and the cursor is getting cleaned up. If you addNoCursorTimeout = true
to theFindOptions<BsonDocument>
, it will prevent themongod
from cleaning up the cursor, but that means unless the client cleans it up, it won't be cleaned up until a server restart. So just make sure all error paths callDispose
on thechunksCursor
. Theusing
directive should solve that for you automatically, but an explicittry...finally
wouldn't hurt either.