alllexx88 · December 3, 2016 19:40
diff --git a/myrulib_to_calibre.sh b/myrulib_to_calibre.sh
 #!/bin/bash

 SOURCE_DB="myrulib.db"

 TARGET_DB="metadata_my.db"

 #an empty Calibre database, with books and series
 # create/update triggers removed
 EMPTY_DB="metadata_empty.db"

 LIB_ROOT="./"

 function esc_q {
 # double quotes  escape themselves in SQL
    sed 's/"/""/g'
 }

 function esc_sed {
 # sed special characters escape
    sed -e 's/\\/\\\\"/g' -e 's~[\.\*\^\$/]\|\[\|\]~\\&~g'
 }

 function my_cut {
    local i="$1"
    i=$((i - 1))
    sed -n "s/\([^¶]*¶\)\{$i\}\([^¶]*\).*/\2/p"
 }

 function get_date {
    local arg=""
    if [ -n "$1" ]; then
        arg="--date=$1"
    fi
    date -u $arg "+%Y-%m-%d %H:%M:%S.%N%z" | sed 's/[0-9]\{2\}+0000$/+0000/'
 }

 function get_stamp {
    get_date $(stat "${LIB_ROOT}$1" -c%y)
 }

 # turn comma-separated list of authors into
 # newline separated one. Also remove some trash
 # from authors names
 function sanitize_authors {
    sed \
 -e 's/, \?\([[:upper:]]\|d[ie][[:upper:]]\|v[ao]n \|d[ea] \|de[ls] \|te[nr]\?'`
 `' \|tom \|де \|ван \|ди \|фон \)/\n\1/g' | sed \
 -e 's/, \?[ae]t al$//'  -e 's/, ред$//' \
 -e 's/\([[:upper:]]\)\.\?[, ]*$/\1./'
 }

 #filter get ids from authors names (case-insensetive)
 function auth_ids {
    local auths=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
    (sed -n -e "/^[^¶]*¶\(${auths}\)\$/Ip" | my_cut 1) <<< "${AUTHORS_FULL_NAMES}"
 }

 #same as auth_ids, but for series
 function series_ids {
    local series=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
    (sed -n -e "/^[^¶]*¶\(${series}\)\$/Ip" | my_cut 1) <<< "${SERIES}"
 }

 #same as auth_ids, but for tags
 function tag_ids {
    local tags=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
    (sed -n -e "/^[^¶]*¶\(${tags}\)\$/Ip" | my_cut 1) <<< "${TAGS}"
 }

 #get hierarchical tag from file path, e.g.,
 # "M_Mathematics/MC_Calculus/MCat_Advanced calculus/<file>.pdf" ->
 # "Mathematics.Calculus.Advanced calculus"
 function tag_from_filepath {
    sed -e 's|^[^/]*_||' -e 's|/[^/]*_|/|g' -e 's|/[^/]*$||' -e 's|/|.|g'
 }

 #same as auth_ids, but for langs
 function lang_ids {
    local langs=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
    (sed -n -e "/^[^¶]*¶\(${langs}\)\$/Ip" | my_cut 1) <<< "${LANGS}"
 }

 #feed stdin to sqlite, as well as log to "$1"
 function feed_sql_and_log {
    exec 3>>"$1"
    rm -f "$1"
    tee /dev/fd/3 | sqlite3 "${TARGET_DB}" 
 }

 #the most painful part: process book entries and collect/feed the respective
 # books, books_authors_link, books_series_link, books_tags_link,
 # books_languages_link, comments SQL INSERT commands
 function books_insert {
 # BOOKS:
 #<id>¶<title>¶<auths>¶<series>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<desc>

    local book
    local book_count=$(echo "${BOOKS}" | wc -l)
    local n=$(echo -n "${book_count}" | wc -c)
    (while IFS='' read -r book || [ -n "$book" ]; do
        local book_id=$(echo "$book" | my_cut 1)
        (awk "{printf \"[%07.4f]%% Inserting book %${n}d out of ${book_count}"`
                  `"\n\", (\$0*100)/${book_count}, \$0}" >&2) <<< "${book_id}"
        local title=$(echo "$book" | my_cut 2 | esc_q)
        local authors=$(echo "$book" | my_cut 3 | sanitize_authors)
        local author_sort=$(echo "$authors" | \
                            sed ':a;N;$!ba;s/\([^\n]*\)\n/\1 \& /g;s/.*/\L&/' | esc_q)
        local series=$(echo "$book" | my_cut 4)
        local file_path=$(echo "$book" | my_cut 5)
        local file_type=$(echo "$book" | my_cut 6)
        local md5sum=$(echo "$book" | my_cut 7)
        local langs=$(echo "$book" | my_cut 8 | sed -e 's/,/\n/g')
        local year=$(echo "$book" | my_cut 9)
        local description=$(echo "$book" | my_cut 10 | esc_q | \
                            sed 's/\\newline/\n/g')
        local timestamp=$(get_stamp "${file_path}")
        local ISBN=$(echo "${file_path}" | \
                     sed -n 's|.*(ISBN \([0-9]*\))[^/]*$|\1|p')
        #book hierarchical classification, extracted from file path
        local tag=$(echo "${file_path}" | tag_from_filepath)

        #books INSERT
        echo -e "INSERT INTO books (id,title,sort,timestamp,pubdate,"`
                                  `"author_sort,isbn,path,uuid,has_cover,"`
                                  `"last_modified)\n"`
               `"VALUES (${book_id},\"$title\",\"$title\",\"$timestamp\","`
                    `"\"$year\",\"${author_sort}\",\"$ISBN\",\"${file_path}\""`
                    `",\"$md5sum\",0,\"$timestamp\");"

        #books_authors_link INSERT
        auth_ids "$authors" | sed -n -e 's/.\+/'`
             `"INSERT INTO books_authors_link (book,author)\n"`
             `"VALUES (${book_id},&);/p"

        #books_series_link INSERT
         series_ids "$series" | sed -n -e 's/.\+/'`
             `"INSERT INTO books_series_link (book,series)\n"`
             `"VALUES (${book_id},&);/p"

        #books_tags_link INSERT
         tag_ids "$tag" | sed -n -e 's/.\+/'`
             `"INSERT INTO books_tags_link (book,tag)\n"`
             `"VALUES (${book_id},&);/p"

        #books_languages_link INSERT
         lang_ids "$langs" | sed -n -e 's/.\+/'`
             `"INSERT INTO books_languages_link (book,lang_code,item_order)\n"`
             `"VALUES (${book_id},&,0);/p"

        #comments INSERT
         if [ -n "${description}" ]; then
             echo -e "INSERT INTO comments (book,text)\n"`
                    `"VALUES (${book_id},\"${description}\");"
         fi
    done <<< "${BOOKS}") | feed_sql_and_log "$1"
 }

 cp -f "${EMPTY_DB}" "${TARGET_DB}"

 # AUTHORS_FULL_NAMES: '<id>¶<name>' lines
 AUTHORS_FULL_NAMES=$(sqlite3 "${SOURCE_DB}" 'SELECT full_name FROM authors;' | \
                     sanitize_authors | sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')

 # TAGS: '<id>¶<tag>' lines
 TAGS=$(sqlite3 "${SOURCE_DB}" 'SELECT file_name FROM books;' | tag_from_filepath | \
       sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')

 # LANGS: '<id>¶<lang>' lines
 LANGS=$(sqlite3 "${SOURCE_DB}" 'SELECT DISTINCT lang FROM books;' | \
        sed -e 's/,/\n/g' | sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')

 # BOOKS:
 #'<id>¶<title>¶<authors>¶<sequence>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<description>'
 BOOKS=$(sqlite3  -separator '<S_e^Pa_r^At_oR>' "${SOURCE_DB}" \
    'SELECT printf("<S_e^Pa_r^At_oR>%s",books.title),authors.full_name,'`
                     `'sequences.value,books.file_name,books.file_type,'`
                `'books.md5sum,books.lang,books.year,books.description '`
   `'FROM books,authors,sequences '`
   `'WHERE books.id_author=authors.id AND books.id_sequence=sequences.id;' | \
  sed -e 's/<S_e^Pa_r^At_oR>/¶/g' | sed ':a;N;$!ba;s/\n\([^¶]\)/\\newline\1/g' | \
  awk '{printf("%d%s\n", NR,$0)}')

 # SERIES: '<id>¶<name>'
 SERIES=$(sqlite3 "${SOURCE_DB}" 'SELECT value FROM sequences;' | sed '/^\s*$/d' | \
         sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')

 #INSERT authors
 echo "${AUTHORS_FULL_NAMES}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
     `'INSERT INTO authors (id,name,sort)\n'`
     `'VALUES (\1,\"\2\",\"\L\2\");/' | \
     feed_sql_and_log authors_insert_command.txt

 #INSERT tags
 echo "${TAGS}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
     `'INSERT INTO tags (id,name)\nVALUES (\1,\"\2\");/' | \
     feed_sql_and_log tags_insert_command.txt

 #INSERT languages
 echo "${LANGS}" | sed 's/\([^¶]*\)¶\(.*\)/'`
     `'INSERT INTO languages (id,lang_code)\n'`
     `'VALUES (\1,\"\2\");/' | \
     feed_sql_and_log languages_insert_command.txt

 #INSERT series
 echo "${SERIES}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
     `'INSERT INTO series (id,name,sort)\n'`
     `'VALUES (\1,\"\2\",\"\2\");/' | \
     feed_sql_and_log series_insert_command.txt

 #INSERT books, books_authors_link, books_series_link,
 # books_tags_link, books_languages_link, comments
 books_insert books_insert_command.txt
	#!/bin/bash

	SOURCE_DB="myrulib.db"

	TARGET_DB="metadata_my.db"

	#an empty Calibre database, with books and series
	# create/update triggers removed
	EMPTY_DB="metadata_empty.db"

	LIB_ROOT="./"

	function esc_q {
	# double quotes escape themselves in SQL
	sed 's/"/""/g'
	}

	function esc_sed {
	# sed special characters escape
	sed -e 's/\\/\\\\"/g' -e 's~[\.\*\^\$/]\\|\[\\|\]~\\&~g'
	}

	function my_cut {
	local i="$1"
	i=$((i - 1))
	sed -n "s/\([^¶]¶\)\{$i\}\([^¶]\).*/\2/p"
	}

	function get_date {
	local arg=""
	if [ -n "$1" ]; then
	arg="--date=$1"
	fi
	date -u $arg "+%Y-%m-%d %H:%M:%S.%N%z" \| sed 's/[0-9]\{2\}+0000$/+0000/'
	}

	function get_stamp {
	get_date $(stat "${LIB_ROOT}$1" -c%y)
	}

	# turn comma-separated list of authors into
	# newline separated one. Also remove some trash
	# from authors names
	function sanitize_authors {
	sed \
	-e 's/, \?\([[:upper:]]\\|d[ie][[:upper:]]\\|v[ao]n \\|d[ea] \\|de[ls] \\|te[nr]\?'`
	`' \\|tom \\|де \\|ван \\|ди \\|фон \)/\n\1/g' \| sed \
	-e 's/, \?[ae]t al$//' -e 's/, ред$//' \
	-e 's/\([[:upper:]]\)\.\?[, ]*$/\1./'
	}

	#filter get ids from authors names (case-insensetive)
	function auth_ids {
	local auths=$(echo "$1" \| esc_sed \| sed ':a;N;$!ba;s/\n/\\\|/g')
	(sed -n -e "/^[^¶]*¶\(${auths}\)\$/Ip" \| my_cut 1) <<< "${AUTHORS_FULL_NAMES}"
	}

	#same as auth_ids, but for series
	function series_ids {
	local series=$(echo "$1" \| esc_sed \| sed ':a;N;$!ba;s/\n/\\\|/g')
	(sed -n -e "/^[^¶]*¶\(${series}\)\$/Ip" \| my_cut 1) <<< "${SERIES}"
	}

	#same as auth_ids, but for tags
	function tag_ids {
	local tags=$(echo "$1" \| esc_sed \| sed ':a;N;$!ba;s/\n/\\\|/g')
	(sed -n -e "/^[^¶]*¶\(${tags}\)\$/Ip" \| my_cut 1) <<< "${TAGS}"
	}

	#get hierarchical tag from file path, e.g.,
	# "M_Mathematics/MC_Calculus/MCat_Advanced calculus/<file>.pdf" ->
	# "Mathematics.Calculus.Advanced calculus"
	function tag_from_filepath {
	sed -e 's\|^[^/]_\|\|' -e 's\|/[^/]_\|/\|g' -e 's\|/[^/]*$\|\|' -e 's\|/\|.\|g'
	}

	#same as auth_ids, but for langs
	function lang_ids {
	local langs=$(echo "$1" \| esc_sed \| sed ':a;N;$!ba;s/\n/\\\|/g')
	(sed -n -e "/^[^¶]*¶\(${langs}\)\$/Ip" \| my_cut 1) <<< "${LANGS}"
	}

	#feed stdin to sqlite, as well as log to "$1"
	function feed_sql_and_log {
	exec 3>>"$1"
	rm -f "$1"
	tee /dev/fd/3 \| sqlite3 "${TARGET_DB}"
	}

	#the most painful part: process book entries and collect/feed the respective
	# books, books_authors_link, books_series_link, books_tags_link,
	# books_languages_link, comments SQL INSERT commands
	function books_insert {
	# BOOKS:
	#<id>¶<title>¶<auths>¶<series>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<desc>

	local book
	local book_count=$(echo "${BOOKS}" \| wc -l)
	local n=$(echo -n "${book_count}" \| wc -c)
	(while IFS='' read -r book \|\| [ -n "$book" ]; do
	local book_id=$(echo "$book" \| my_cut 1)
	(awk "{printf \"[%07.4f]%% Inserting book %${n}d out of ${book_count}"`
	`"\n\", (\$0*100)/${book_count}, \$0}" >&2) <<< "${book_id}"
	local title=$(echo "$book" \| my_cut 2 \| esc_q)
	local authors=$(echo "$book" \| my_cut 3 \| sanitize_authors)
	local author_sort=$(echo "$authors" \| \
	sed ':a;N;$!ba;s/\([^\n]\)\n/\1 \& /g;s/./\L&/' \| esc_q)
	local series=$(echo "$book" \| my_cut 4)
	local file_path=$(echo "$book" \| my_cut 5)
	local file_type=$(echo "$book" \| my_cut 6)
	local md5sum=$(echo "$book" \| my_cut 7)
	local langs=$(echo "$book" \| my_cut 8 \| sed -e 's/,/\n/g')
	local year=$(echo "$book" \| my_cut 9)
	local description=$(echo "$book" \| my_cut 10 \| esc_q \| \
	sed 's/\\newline/\n/g')
	local timestamp=$(get_stamp "${file_path}")
	local ISBN=$(echo "${file_path}" \| \
	sed -n 's\|.(ISBN \([0-9]\))[^/]*$\|\1\|p')
	#book hierarchical classification, extracted from file path
	local tag=$(echo "${file_path}" \| tag_from_filepath)

	#books INSERT
	echo -e "INSERT INTO books (id,title,sort,timestamp,pubdate,"`
	`"author_sort,isbn,path,uuid,has_cover,"`
	`"last_modified)\n"`
	`"VALUES (${book_id},\"$title\",\"$title\",\"$timestamp\","`
	`"\"$year\",\"${author_sort}\",\"$ISBN\",\"${file_path}\""`
	`",\"$md5sum\",0,\"$timestamp\");"

	#books_authors_link INSERT
	auth_ids "$authors" \| sed -n -e 's/.\+/'`
	`"INSERT INTO books_authors_link (book,author)\n"`
	`"VALUES (${book_id},&);/p"

	#books_series_link INSERT
	series_ids "$series" \| sed -n -e 's/.\+/'`
	`"INSERT INTO books_series_link (book,series)\n"`
	`"VALUES (${book_id},&);/p"

	#books_tags_link INSERT
	tag_ids "$tag" \| sed -n -e 's/.\+/'`
	`"INSERT INTO books_tags_link (book,tag)\n"`
	`"VALUES (${book_id},&);/p"

	#books_languages_link INSERT
	lang_ids "$langs" \| sed -n -e 's/.\+/'`
	`"INSERT INTO books_languages_link (book,lang_code,item_order)\n"`
	`"VALUES (${book_id},&,0);/p"

	#comments INSERT
	if [ -n "${description}" ]; then
	echo -e "INSERT INTO comments (book,text)\n"`
	`"VALUES (${book_id},\"${description}\");"
	fi
	done <<< "${BOOKS}") \| feed_sql_and_log "$1"
	}

	cp -f "${EMPTY_DB}" "${TARGET_DB}"

	# AUTHORS_FULL_NAMES: '<id>¶<name>' lines
	AUTHORS_FULL_NAMES=$(sqlite3 "${SOURCE_DB}" 'SELECT full_name FROM authors;' \| \
	sanitize_authors \| sort -uf \| awk '{printf("%d¶%s\n", NR,$0)}')

	# TAGS: '<id>¶<tag>' lines
	TAGS=$(sqlite3 "${SOURCE_DB}" 'SELECT file_name FROM books;' \| tag_from_filepath \| \
	sort -uf \| awk '{printf("%d¶%s\n", NR,$0)}')

	# LANGS: '<id>¶<lang>' lines
	LANGS=$(sqlite3 "${SOURCE_DB}" 'SELECT DISTINCT lang FROM books;' \| \
	sed -e 's/,/\n/g' \| sort -uf \| awk '{printf("%d¶%s\n", NR,$0)}')

	# BOOKS:
	#'<id>¶<title>¶<authors>¶<sequence>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<description>'
	BOOKS=$(sqlite3 -separator '<S_e^Pa_r^At_oR>' "${SOURCE_DB}" \
	'SELECT printf("<S_e^Pa_r^At_oR>%s",books.title),authors.full_name,'`
	`'sequences.value,books.file_name,books.file_type,'`
	`'books.md5sum,books.lang,books.year,books.description '`
	`'FROM books,authors,sequences '`
	`'WHERE books.id_author=authors.id AND books.id_sequence=sequences.id;' \| \
	sed -e 's/<S_e^Pa_r^At_oR>/¶/g' \| sed ':a;N;$!ba;s/\n\([^¶]\)/\\newline\1/g' \| \
	awk '{printf("%d%s\n", NR,$0)}')

	# SERIES: '<id>¶<name>'
	SERIES=$(sqlite3 "${SOURCE_DB}" 'SELECT value FROM sequences;' \| sed '/^\s*$/d' \| \
	sort -uf \| awk '{printf("%d¶%s\n", NR,$0)}')

	#INSERT authors
	echo "${AUTHORS_FULL_NAMES}" \| esc_q \| sed 's/\([^¶]\)¶\(.\)/'`
	`'INSERT INTO authors (id,name,sort)\n'`
	`'VALUES (\1,\"\2\",\"\L\2\");/' \| \
	feed_sql_and_log authors_insert_command.txt

	#INSERT tags
	echo "${TAGS}" \| esc_q \| sed 's/\([^¶]\)¶\(.\)/'`
	`'INSERT INTO tags (id,name)\nVALUES (\1,\"\2\");/' \| \
	feed_sql_and_log tags_insert_command.txt

	#INSERT languages
	echo "${LANGS}" \| sed 's/\([^¶]\)¶\(.\)/'`
	`'INSERT INTO languages (id,lang_code)\n'`
	`'VALUES (\1,\"\2\");/' \| \
	feed_sql_and_log languages_insert_command.txt

	#INSERT series
	echo "${SERIES}" \| esc_q \| sed 's/\([^¶]\)¶\(.\)/'`
	`'INSERT INTO series (id,name,sort)\n'`
	`'VALUES (\1,\"\2\",\"\2\");/' \| \
	feed_sql_and_log series_insert_command.txt

	#INSERT books, books_authors_link, books_series_link,
	# books_tags_link, books_languages_link, comments
	books_insert books_insert_command.txt