Last active
June 20, 2020 10:50
-
-
Save fredeil/439e3ccfccc4e1a61259fcf2b9cf1acf to your computer and use it in GitHub Desktop.
Cleaning up orphaned chunks in GridFS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using MongoDB.Bson; | |
using MongoDB.Driver; | |
using Serilog; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Threading; | |
using System.Threading.Tasks; | |
namespace Mongo | |
{ | |
internal class Program | |
{ | |
private static async Task<int> Main(string[] args) | |
{ | |
Log.Logger = new LoggerConfiguration() | |
.MinimumLevel.Debug() | |
.WriteTo.Console() | |
.WriteTo.File("mongo.txt") | |
.CreateLogger(); | |
var mongoSettings = MongoClientSettings.FromConnectionString(""); | |
var mongoClient = new MongoClient(mongoSettings); | |
var mongoDatabase = mongoClient.GetDatabase("packagesDb"); | |
var chunksCollection = mongoDatabase.GetCollection<BsonDocument>("packages.chunks"); | |
var filesCollection = mongoDatabase.GetCollection<BsonDocument>("packages.files"); | |
using var cts = new CancellationTokenSource(); | |
Console.CancelKeyPress += (sender, e) => | |
{ | |
e.Cancel = true; | |
cts.Cancel(); | |
}; | |
var sw = new Stopwatch(); | |
var numberOfChunksDeleted = 0UL; | |
try | |
{ | |
var chunkFilter = Builders<BsonDocument>.Filter.Empty; | |
using var cursor = await chunksCollection.FindAsync(chunkFilter, new FindOptions<BsonDocument, BsonDocument>() | |
{ | |
BatchSize = 1000, | |
Skip = 0, | |
Limit = 100_000, | |
//Sort = Builders<BsonDocument>.Sort.Descending("_id"), | |
Projection = Builders<BsonDocument>.Projection.Exclude("data") | |
}, cts.Token); | |
sw.Start(); | |
var uniqueFiles = new HashSet<string>(); | |
while (await cursor.MoveNextAsync(cts.Token)) | |
{ | |
var batch = cursor.Current; | |
foreach (var gridFSChunk in batch) | |
{ | |
var objecdId = gridFSChunk.GetValue("_id").AsObjectId; | |
uniqueFiles.Add(gridFSChunk.GetValue("files_id").AsObjectId.ToString()); | |
} | |
} | |
foreach (var files_id in uniqueFiles) | |
{ | |
var filter = Builders<BsonDocument>.Filter.Eq("_id", new ObjectId(files_id)); | |
var count = await filesCollection.CountDocumentsAsync(filter, null, cts.Token); | |
if (count > 0) | |
{ | |
continue; | |
} | |
var deleteFilter = Builders<BsonDocument>.Filter.Eq("files_id", new ObjectId(files_id)); | |
var deleteResult = await chunksCollection.DeleteManyAsync(deleteFilter, null,cts.Token); | |
numberOfChunksDeleted += (ulong)deleteResult.DeletedCount; | |
Log.Information("{0},{1}", deleteResult, files_id); | |
} | |
} | |
catch (Exception ex) | |
{ | |
Log.Error(ex, "Something went wrong."); | |
} | |
finally | |
{ | |
sw.Stop(); | |
Log.Information("Number of chunks deleted: {0}.", numberOfChunksDeleted); | |
Log.Information("Elapsed time: {0} minutes.", sw.Elapsed.TotalMinutes); | |
Log.CloseAndFlush(); | |
} | |
return 0; | |
} | |
} | |
} |
That means it is taking longer than 10 minutes between calls to MoveNextAsync()
and the cursor is getting cleaned up. If you add NoCursorTimeout = true
to the FindOptions<BsonDocument>
, it will prevent the mongod
from cleaning up the cursor, but that means unless the client cleans it up, it won't be cleaned up until a server restart. So just make sure all error paths call Dispose
on the chunksCursor
. The using
directive should solve that for you automatically, but an explicit try...finally
wouldn't hurt either.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
After a while when the program has been running this exception occurs:
MongoDB.Driver.MongoCommandException: Command getMore failed: cursor id 93648701887 not found.
. However, when I did my first "dry run", only counting the documents instead of deleting them this did not happen.