Created
August 29, 2023 19:31
-
-
Save olitreadwell/5b852c84b81d1824ec50ef41a1659877 to your computer and use it in GitHub Desktop.
Download flatiron phase submissions: project, videos, blogs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
log_message() { | |
local message="$1" | |
echo "$(date +"%Y-%m-%d %H:%M:%S") - $message" >> "${processed_dir}/process_log.txt" | |
} | |
# Utility function to extract user's name from a file | |
extract_name() { | |
local file="$1" | |
local name=$(awk -F': ' '/<title>/{print $3}' "$file" | sed 's/<\/title>//g' | tr -d ',' | tr ' ' '_' | tr '[:upper:]' '[:lower:]') | |
if [ -z "$name" ]; then | |
log_message "ERROR: Unable to extract name from $file." | |
return 1 | |
fi | |
echo "$name" | |
} | |
# Utility function to extract URL from a file | |
extract_url() { | |
local file="$1" | |
local url=$(awk -F'"' '/<a href/{print $2}' "$file") | |
if [ -z "$url" ]; then | |
log_message "ERROR: Unable to extract URL from $file." | |
return 1 | |
fi | |
echo "$url" | |
} | |
# Create a directory if it doesn't exist | |
create_directory() { | |
local dir="$1" | |
if [ ! -d "$dir" ]; then | |
mkdir "$dir" || log_message "ERROR: Failed to create directory $dir." | |
else | |
log_message "INFO: Directory $dir already exists." | |
fi | |
} | |
# Create/check a file | |
create_file() { | |
local file="$1" | |
touch "$file" || log_message "ERROR: Failed to create file $file." | |
} | |
# Convert GitHub URL to SSH | |
convert_to_ssh() { | |
local url="$1" | |
echo "$url" | sed 's_https://__g' | awk -F'/' '{print "git@"$1":"$2"/"$3".git"}' | |
} | |
# Initialize CSV file with headers | |
initialize_csv() { | |
local csv_file="$1" | |
if [ ! -f "$csv_file" ]; then | |
echo "name,project url,video url,blog url" > "$csv_file" | |
fi | |
} | |
# Check if the user exists in the CSV | |
user_exists_in_csv() { | |
local name="$1" | |
local csv_file="$2" | |
grep -q "^${name}," "$csv_file" | |
} | |
# Append a new entry to the CSV | |
append_entry_to_csv() { | |
local name="$1" | |
local type="$2" | |
local url="$3" | |
local csv_file="$4" | |
case $type in | |
"GitHub Repo") | |
echo "$name,$url,," >> "$csv_file" | |
;; | |
"Video") | |
echo "$name,,$url," >> "$csv_file" | |
;; | |
"Blog") | |
echo "$name,,,$url" >> "$csv_file" | |
;; | |
*) | |
log_message "Error: Unknown content type '$type' for user '$name'." | |
;; | |
esac | |
} | |
# Update an existing entry in the CSV | |
update_entry_in_csv() { | |
local name="$1" | |
local type="$2" | |
local url="$3" | |
local csv_file="$4" | |
local tmp_file="${csv_file}.tmp" | |
case $type in | |
"GitHub Repo") | |
awk -F, -v name="$name" -v url="$url" 'BEGIN {OFS=","} $1 == name {$2=url} 1' "$csv_file" > "$tmp_file" | |
;; | |
"Video") | |
awk -F, -v name="$name" -v url="$url" 'BEGIN {OFS=","} $1 == name {$3=url} 1' "$csv_file" > "$tmp_file" | |
;; | |
"Blog") | |
awk -F, -v name="$name" -v url="$url" 'BEGIN {OFS=","} $1 == name {$4=url} 1' "$csv_file" > "$tmp_file" | |
;; | |
*) | |
log_message "Error: Unknown content type '$type' for user '$name'." | |
;; | |
esac | |
# Move the temp file back to the CSV if it was created | |
[ -f "$tmp_file" ] && mv "$tmp_file" "$csv_file" || log_message "Error updating CSV for user '$name' and content type '$type'." | |
} | |
append_to_csv() { | |
local name="$1" | |
local type="$2" | |
local url="$3" | |
local csv_file="$4" | |
initialize_csv "$csv_file" | |
if user_exists_in_csv "$name" "$csv_file"; then | |
update_entry_in_csv "$name" "$type" "$url" "$csv_file" | |
else | |
append_entry_to_csv "$name" "$type" "$url" "$csv_file" | |
fi | |
} | |
append_url_to_notes() { | |
local name="$1" | |
local type="$2" | |
local url="$3" | |
local dir_name="${processed_dir}/${name}" | |
local file_name="${dir_name}/${name}_notes.md" | |
echo "${type}: ${url}" >> "$file_name" || log_message "ERROR: Failed to append URL to notes for user $name." | |
} | |
process_github_repo() { | |
local name="$1" | |
local url="$2" | |
local ssh_url=$(convert_to_ssh "$url") | |
local dir_name="${processed_dir}/${name}" | |
local dir_for_repo="${dir_name}/$(basename "${ssh_url%%.git}")" | |
if ! command -v git &> /dev/null; then | |
log_message "ERROR: Git is not installed." | |
return 1 | |
fi | |
git clone $ssh_url $dir_for_repo || log_message "ERROR: Failed to clone GitHub repo from $url for user $name." | |
} | |
process_content() { | |
local dir="$1" | |
local type="$2" | |
local csv_file="${processed_dir}/all_links.csv" | |
for file in "$dir"/*.html; do | |
local name=$(extract_name "$file") | |
local url=$(extract_url "$file") | |
if [[ -z "$name" || -z "$url" ]]; then | |
log_message "WARNING: Skipping file due to error: $file" | |
continue | |
fi | |
local dir_name="${processed_dir}/${name}" | |
local file_name="${dir_name}/${name}_notes.md" | |
create_directory "$dir_name" | |
create_file "$file_name" | |
append_url_to_notes "$name" "$type" "$url" | |
append_to_csv "$name" "$type" "$url" "$csv_file" | |
if [[ $type == "GitHub Repo" && $url == https://github.com* ]]; then | |
process_github_repo "$name" "$url" | |
fi | |
done | |
} | |
# Directories for repos, videos, and blogs | |
repos_dir="$1" | |
videos_dir="$2" | |
blogs_dir="$3" | |
# Check if directories exist | |
if [ ! -d "$repos_dir" ] || [ ! -d "$videos_dir" ] || [ ! -d "$blogs_dir" ]; then | |
echo "One or more directories do not exist" | |
exit 1 | |
fi | |
# Processed directory | |
processed_dir="./processed" | |
# Ensure processed directory exists | |
create_directory "$processed_dir" | |
# Initialize CSV with headers | |
initialize_csv "${processed_dir}/all_links.csv" | |
# Process HTML files in the blogs, videos, and repos directories | |
process_content "$blogs_dir" "Blog" | |
process_content "$videos_dir" "Video" | |
process_content "$repos_dir" "GitHub Repo" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
credit to @thompsonplyler for originally writing this
created with the help of ChatGPT4