Last active
October 27, 2021 16:29
-
-
Save semenko/0294f136438da1393565fe6b6ad0de6d to your computer and use it in GitHub Desktop.
Process 10x .mtx matrix, feature, and barcode files into single CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# set -x | |
# Inspired by https://kb.10xgenomics.com/hc/en-us/articles/360023793031-How-can-I-convert-the-feature-barcode-matrix-from-Cell-Ranger-3-to-a-CSV-file- | |
FILES="GSM*_barcodes.tsv" | |
for i in $FILES | |
do | |
filename="${i%_*}" # Extract prefix before _barcodes.txv | |
echo "Working on $filename..." | |
echo -e "\tSorting features, barcodes, & matrix files." | |
# Print line number along with contents of _barcodes.tsv and _genes.tsv | |
cat "${filename}_barcodes.tsv" | awk -F "\t" 'BEGIN { OFS = "," }; {print NR,$1}' | sort -t, -k 1b,1 > tmp.barcodes.numbered & | |
cat "${filename}_features.tsv" | awk -F "\t" 'BEGIN { OFS = "," }; {print NR,$1,$2}' | sort -t, -k 1b,1 > tmp.features.numbered & | |
# Skip the header lines and sort _matrix.mtx | |
cat "${filename}_matrix.mtx" | tail -n +4 | awk -F " " 'BEGIN { OFS = "," }; {print $1,$2,$3}' | sort -t, -k 1b,1 > tmp.feature_sorted_mtx & | |
cat "${filename}_matrix.mtx" | tail -n +4 | awk -F " " 'BEGIN { OFS = "," }; {print $1,$2,$3}' | sort -t, -k 2b,2 > tmp.barcode_sorted_mtx & | |
echo -e "\t... (waiting for completion)" | |
wait $(jobs -p) | |
echo -e "\tGenerating final matrix." | |
# echo "barcode,feature_id,feature_name,umi_count" > "${filename}.final_matrix.csv" | |
# Use join to replace line number with barcodes and genes | |
join -t, -1 1 -2 1 tmp.features.numbered tmp.feature_sorted_mtx | cut -d, -f 2,3,4,5,6 | sort -t, -k 3b,3 | join -t, -1 1 -2 3 tmp.barcodes.numbered - | cut -d, -f 2,3,4,5,6 > "${filename}.final_matrix.csv" | |
rm tmp.barcodes.numbered tmp.features.numbered tmp.feature_sorted_mtx tmp.barcode_sorted_mtx | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment