Skip to content

Instantly share code, notes, and snippets.

@tilleps
Created January 11, 2025 04:57
Show Gist options
  • Save tilleps/5b38fc9d48cd8b100e818c32c345cbad to your computer and use it in GitHub Desktop.
Save tilleps/5b38fc9d48cd8b100e818c32c345cbad to your computer and use it in GitHub Desktop.
import { mkdir } from "node:fs/promises";
import { dirname, join } from "node:path";
import unzipper from "unzipper";
import { createWriteStream } from "node:fs";
/**
* Main function to unzip and filter files based on user-defined patterns
*/
async function unzipAndFilter(inputZip, destDir, ignorePatterns) {
try {
const directory = await unzipper.Open.file(inputZip);
const files = directory.files.filter(function (file) {
return file.type === "File";
});
// Use the filter function to skip the files matching the patterns
const filteredFiles = filterFiles(files, ignorePatterns);
// Extract the filtered files
const promises = filteredFiles.map(async file => {
const outputPath = join(destDir, file.path);
const outputDir = dirname(outputPath);
// Ensure the folder exists
await mkdir(outputDir, { recursive: true });
return new Promise((resolve, reject) => {
file.stream()
.pipe(createWriteStream(outputPath))
.on('error', reject)
.on('finish', resolve);
});
});
// Wait for all the extractions to complete
await Promise.all(promises);
console.log('Extraction complete!');
} catch (error) {
console.error('Error during extraction:', error);
}
}
/**
* Convert a wildcard pattern (e.g. '*.txt') into a regular expression.
* Wildcards are assumed to be '*' and are converted to '.*'
* @param {string} pattern - The pattern with wildcard to convert.
* @returns {RegExp} The equivalent regular expression.
*/
function wildcardToRegex(pattern) {
// Escape special characters except for the wildcard '*'
const escapedPattern = pattern.replace(/([.+^=!:${}()|\[\]\/\\])/g, "\\$1");
// Replace the '*' wildcard with '.*' for regex matching
const regexPattern = escapedPattern.replace(/\*/g, '.*');
// Return a new RegExp object
return new RegExp(`^${regexPattern}$`);
}
/**
* Convert a list of user-provided wildcard patterns into regular expressions
* @param {Array} patterns - List of patterns with wildcards (e.g., '*.txt', '*test*').
* @returns {Array} List of regular expressions.
*/
function convertPatternsToRegex(patterns) {
return patterns.map(pattern => wildcardToRegex(pattern));
}
/**
* Filter out files based on a list of user-defined patterns
* @param {Array} files - List of file objects (e.g., from unzipper).
* @param {Array} patterns - List of user-provided patterns to ignore (e.g., ['*.txt', '*test*']).
* @returns {Array} List of files that do not match any of the ignore patterns.
*/
function filterFiles(files, patterns) {
const regexPatterns = convertPatternsToRegex(patterns);
return files.filter(file => {
// Check if the file path matches any of the ignore patterns
return !regexPatterns.some(regex => regex.test(file.path));
});
}
const inputFile = "./var/tmp/4d45124d2a284d573cd36d917c4d60ab";
const destPath = "./var/downloads/";
console.log("inputFile", inputFile);
console.log("destPath", destPath);
try {
// Example usage: unzip files and skip those that match the patterns
const ignorePatterns = ['*.txt', '*test*', 'folder/*']; // User-defined patterns
await unzipAndFilter(inputFile, destPath, ignorePatterns);
}
catch (err) {
console.log("err", err);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment