ThomDietrich · August 12, 2020 22:46 · ThomDietrich · Aug 11, 2020
diff --git a/restic-benchmark-dedup.sh b/restic-benchmark-dedup.sh
 #!/bin/bash

 BASE="$(pwd)/temp_test_deduplication"
 SOURCE="$BASE/input"
 REPO_BASE="$BASE/repo"
 NUM_FILES=16
 FILE_SIZE="8M"
 export RESTIC_PASSWORD="password123"

 ############################################################

 echo "Starting with clean folder..."
 TEMP="$BASE/temp"
 rm -rf "$BASE"
 rm -rf "$SOURCE" && mkdir -p "$SOURCE"
 rm -rf "$TEMP" && mkdir -p "$TEMP"
 rm -rf "$REPO_BASE"*


 echo -e "\nInitializing restic repos..."
 restic init --repo=$REPO_BASE-input
 restic init --repo=$REPO_BASE-gzip
 restic init --repo=$REPO_BASE-bzip2
 restic init --repo=$REPO_BASE-xz
 restic init --repo=$REPO_BASE-rsyncable-gzip
 restic init --repo=$REPO_BASE-rsyncable-pigz
 restic init --repo=$REPO_BASE-rsyncable-zstd

 for i in $(seq -f "%03g" 1 $NUM_FILES)
 do
  INDEX=$(cat /dev/urandom | tr -dc 'a-z0-9' | head -c 8)
  echo "============================================================"
  echo "Adding file $i under $SOURCE/$INDEX.txt"
  cat /dev/urandom | tr -dc '[:alnum:] \n' | head -c $FILE_SIZE > "$SOURCE/$INDEX.txt"
  ls -lh "$SOURCE"

  REPO="$REPO_BASE-input"
  echo -e "\n$REPO"
  restic --repo=$REPO backup $SOURCE

  for ALGO in gzip bzip2 xz; do
    echo "------------------------------------------------------------"
    REPO="$REPO_BASE-$ALGO"
    echo -e "\n$REPO"
    /usr/bin/time -f "Compression took %e seconds" \
      tar -cv --$ALGO -f $TEMP/archive.tar.z $SOURCE
    echo
    restic --repo=$REPO backup $TEMP
    rm -rf $TEMP && mkdir $TEMP
  done

  echo "------------------------------------------------------------"
  REPO="$REPO_BASE-rsyncable-gzip"
  echo -e "\n$REPO"
  #tar -cv $SOURCE | gzip --rsyncable > $TEMP/archive.tar.z
  #GZIP='--rsyncable' tar -cvzf $TEMP/archive.tar.gz $SOURCE
  /usr/bin/time -f "Compression took %e seconds" \
    tar -cv --use-compress-program="gzip --rsyncable" -f $TEMP/archive.tar.z $SOURCE
  echo
  restic --repo=$REPO backup $TEMP
  rm -rf $TEMP && mkdir $TEMP

  echo "------------------------------------------------------------"
  REPO="$REPO_BASE-rsyncable-pigz"
  echo -e "\n$REPO"
  #tar -cv $SOURCE | pigz --rsyncable > $TEMP/archive.tar.z
  /usr/bin/time -f "Compression took %e seconds" \
    tar -cv --use-compress-program="pigz --rsyncable" -f $TEMP/archive.tar.z $SOURCE
  echo
  restic --repo=$REPO backup $TEMP
  rm -rf $TEMP && mkdir $TEMP

  echo "------------------------------------------------------------"
  # Attention: rsyncable introduced in https://github.com/facebook/zstd/releases/tag/v1.3.8
  REPO="$REPO_BASE-rsyncable-zstd"
  echo -e "\n$REPO"
  #tar -cv $SOURCE | pigz --rsyncable > $TEMP/archive.tar.z
  /usr/bin/time -f "Compression took %e seconds" \
    tar -cv --use-compress-program="zstd --rsyncable" -f $TEMP/archive.tar.z $SOURCE
  echo
  restic --repo=$REPO backup $TEMP
  rm -rf $TEMP && mkdir $TEMP

 done
 rm -rf $TEMP
 echo -e "\nFinal repo sizes, compared to file input of $NUM_FILES of $FILE_SIZE each:"
 du -hs $BASE/*
	#!/bin/bash

	BASE="$(pwd)/temp_test_deduplication"
	SOURCE="$BASE/input"
	REPO_BASE="$BASE/repo"
	NUM_FILES=16
	FILE_SIZE="8M"
	export RESTIC_PASSWORD="password123"

	############################################################

	echo "Starting with clean folder..."
	TEMP="$BASE/temp"
	rm -rf "$BASE"
	rm -rf "$SOURCE" && mkdir -p "$SOURCE"
	rm -rf "$TEMP" && mkdir -p "$TEMP"
	rm -rf "$REPO_BASE"*


	echo -e "\nInitializing restic repos..."
	restic init --repo=$REPO_BASE-input
	restic init --repo=$REPO_BASE-gzip
	restic init --repo=$REPO_BASE-bzip2
	restic init --repo=$REPO_BASE-xz
	restic init --repo=$REPO_BASE-rsyncable-gzip
	restic init --repo=$REPO_BASE-rsyncable-pigz
	restic init --repo=$REPO_BASE-rsyncable-zstd

	for i in $(seq -f "%03g" 1 $NUM_FILES)
	do
	INDEX=$(cat /dev/urandom \| tr -dc 'a-z0-9' \| head -c 8)
	echo "============================================================"
	echo "Adding file $i under $SOURCE/$INDEX.txt"
	cat /dev/urandom \| tr -dc '[:alnum:] \n' \| head -c $FILE_SIZE > "$SOURCE/$INDEX.txt"
	ls -lh "$SOURCE"

	REPO="$REPO_BASE-input"
	echo -e "\n$REPO"
	restic --repo=$REPO backup $SOURCE

	for ALGO in gzip bzip2 xz; do
	echo "------------------------------------------------------------"
	REPO="$REPO_BASE-$ALGO"
	echo -e "\n$REPO"
	/usr/bin/time -f "Compression took %e seconds" \
	tar -cv --$ALGO -f $TEMP/archive.tar.z $SOURCE
	echo
	restic --repo=$REPO backup $TEMP
	rm -rf $TEMP && mkdir $TEMP
	done

	echo "------------------------------------------------------------"
	REPO="$REPO_BASE-rsyncable-gzip"
	echo -e "\n$REPO"
	#tar -cv $SOURCE \| gzip --rsyncable > $TEMP/archive.tar.z
	#GZIP='--rsyncable' tar -cvzf $TEMP/archive.tar.gz $SOURCE
	/usr/bin/time -f "Compression took %e seconds" \
	tar -cv --use-compress-program="gzip --rsyncable" -f $TEMP/archive.tar.z $SOURCE
	echo
	restic --repo=$REPO backup $TEMP
	rm -rf $TEMP && mkdir $TEMP

	echo "------------------------------------------------------------"
	REPO="$REPO_BASE-rsyncable-pigz"
	echo -e "\n$REPO"
	#tar -cv $SOURCE \| pigz --rsyncable > $TEMP/archive.tar.z
	/usr/bin/time -f "Compression took %e seconds" \
	tar -cv --use-compress-program="pigz --rsyncable" -f $TEMP/archive.tar.z $SOURCE
	echo
	restic --repo=$REPO backup $TEMP
	rm -rf $TEMP && mkdir $TEMP

	echo "------------------------------------------------------------"
	# Attention: rsyncable introduced in https://github.com/facebook/zstd/releases/tag/v1.3.8
	REPO="$REPO_BASE-rsyncable-zstd"
	echo -e "\n$REPO"
	#tar -cv $SOURCE \| pigz --rsyncable > $TEMP/archive.tar.z
	/usr/bin/time -f "Compression took %e seconds" \
	tar -cv --use-compress-program="zstd --rsyncable" -f $TEMP/archive.tar.z $SOURCE
	echo
	restic --repo=$REPO backup $TEMP
	rm -rf $TEMP && mkdir $TEMP

	done
	rm -rf $TEMP
	echo -e "\nFinal repo sizes, compared to file input of $NUM_FILES of $FILE_SIZE each:"
	du -hs $BASE/*