Last active
August 2, 2020 22:51
-
-
Save n1ywb/c3809cb9ce3a7a2e05198a9b7209dbf5 to your computer and use it in GitHub Desktop.
scrounge a dir tree for wanted files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from sys import argv | |
from argparse import ArgumentParser | |
from os.path import join, exists | |
from os import link, symlink, makedirs | |
parser = ArgumentParser() | |
parser.add_argument('input') | |
parser.add_argument('output', default='.') | |
parser.add_argument('basepath', default='.') | |
parser.add_argument('--symbolic', action='store_true') | |
args = parser.parse_args() | |
output = args.output | |
input = args.input | |
basepath = args.basepath | |
symbolic = args.symbolic | |
print "Input: %s" % input | |
print "Output: %s" % output | |
print "Base path: %s" % basepath | |
if not exists(output): | |
makedirs(output) | |
with open(input, 'r') as infile: | |
for line in infile: | |
line = line.strip() | |
if basepath: | |
filepath = join(basepath, line.lstrip('/')) | |
# if len(output) + len(line) > 255: | |
# line = line[len(output):] | |
linkpath = join(output, line.replace('/', r'_')[-255:]) | |
try: | |
if symbolic: | |
filepath = filepath.replace('/.git', '') | |
symlink(filepath, linkpath) | |
else: | |
link(filepath, linkpath) | |
except Exception, err: | |
print "Error linking %s to %s" % (filepath, linkpath) | |
print(err) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import linesep, walk | |
from os.path import join | |
import sys | |
import re | |
output = sys.argv[1] | |
inputs = sys.argv[2:] | |
file_types = dict( | |
image='\.(jpg|jpeg|gif|png)$', | |
git='^\.git$', | |
pdf='\.pdf$', | |
notebook='\.ipynb$', | |
pyscript='\.py$', | |
cs_data='\.dat$', | |
cs_program='\.(.cri|.cr6|.dld)$' | |
) | |
out_files = { | |
k: open(join(output, k + '.txt'), 'w') | |
for k | |
in file_types.keys() | |
} | |
for input in inputs: | |
for dirpath, dirnames, filenames in walk(input): | |
files = dirnames + filenames | |
for file in files: | |
for file_type, pattern in file_types.items(): | |
if re.search(pattern, file, re.I): | |
fqp = join(dirpath, file) | |
print("Found\t%s\t%s" % (file_type, fqp)) | |
out_files[file_type].write( fqp + linesep) | |
continue |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment