Last active
May 2, 2020 07:54
-
-
Save ITler/0a82f3ff858b366c9a9344e07c3d206c to your computer and use it in GitHub Desktop.
Parse free springer book CSV sheet and download titles
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# Input file should be based on downloadable Excel sheets, but saved as CSV file (field_delimiter=; string_delimiter=<empty>) | |
INPUT_FILE=${1} | |
INPUT_FILE_CONTENT_START_LINE=${2:-2} | |
URL_COLUMN_IDX=${3:-19} | |
assemble_download_file_basename() { | |
input_file="${1}" | |
url="${2}" | |
line=$(grep "${url}" ${input_file}) | |
raw=$(echo "${line}" | awk -F';' '{print $9"folderseparator"$1 " (" $2 ") [" $3 "]"}') | |
echo "${raw}" | sed 's,/,-,g;s,folderseparator,/,' | |
} | |
download_epub() { | |
url=${1} | |
file_name="${2}.epub" | |
download_folder=downloaded_epubs | |
mkdir -p ${download_folder}/$(dirname "${file_name}") >/dev/null 2>&1 || true | |
curl -sL -o "${download_folder}/${file_name}" $(parse_epub_url ${url}) | |
} | |
parse_epub_url() { | |
url=${1} | |
echo $(curl -Ls -o /dev/null -w %{url_effective} "${url}" | sed -E 's,/book,/download/epub,;s/$/.epub/') | |
} | |
parse_download_links() { | |
input_file="${1}" | |
input_file_content_start_line="${2}" | |
url_column_index="${3}" | |
tail -n +${input_file_content_start_line} "${input_file}" | awk -F';' "{print \$${url_column_index}}" | |
} | |
main() { | |
for url in $(parse_download_links "${1}" "${2}" "${3}"); do | |
filename=$(assemble_download_file_basename "${1}" ${url}) | |
echo Downloading: "${filename}" | |
download_epub "${url}" "${filename}" | |
done | |
} | |
${__SOURCED__:+return} # to not run code after this line during shellspec tests | |
main "${INPUT_FILE}" "${INPUT_FILE_CONTENT_START_LINE}" "${URL_COLUMN_IDX}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Prerequisites
;
and string delimiter empty)Usage
Download the script
Call this script
Dowloaded files path would be
./downloaded_epubs/<Language>/<Title> (<Author>) [<ISBN>].epub
, which is parsed from the input file