Specifically targetting darknet (Yolo), but easily adapted. Create two lists for training and testing (in train.txt and test.txt) of all jpg files in a series of folders.
#!/bin/bash
# div by 5 = 20:80 split. NB: Bash does not do non-integer arithmetic
# exact split ratios will always be approximate, more approximate than +- 1
SPLIT=5
IMGT="jpg"
DATA="data"
RUNS="r g b h s v rgb surf"
for run in $RUNS; do
cd ./${DATA}/${run}
LEN=`ls -1 *${IMGT} | wc -l`
TEST_LEN=$((LEN/SPLIT))
TRAIN_LEN=$((LEN-TEST_LEN))
find . -name "*${IMGT}" | shuf > shuffled
sed -i -e "s|^..|\./${DATA}/${run}/|g" shuffled
cat shuffled | head -$TRAIN_LEN | sort > train.txt
cat shuffled | tail -$TEST_LEN | sort > tests.txt
if [ `comm -12 train.txt tests.txt | wc -l` -ne 0 ]; then
echo "warning: train.txt and test.txt in ${run} are not disjoint sets"
else
rm shuffled
fi
cd ../..
done;