Created
December 11, 2016 02:10
-
-
Save indisoluble/c6726c36ca91751d162b5ba1a9b61fa8 to your computer and use it in GitHub Desktop.
CSV: <Time>,<BeatsPerMinute> => CSV: <Time>,<BeatsPerMinute>,<WorkingOut>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Given: | |
- CSV file with path: <INPUT_DIR>/heartRate-<YYYYMMDD>.csv | |
- First line of CSV file: <COLUMN_TIME>,<COLUMN_HEARTRATE> | |
- Rest of the lines: <Timestamp with format YYYY-MM-DD HH:mm:ss>,<Beats per minute> | |
Then: | |
- Create a new CSV file in: <OUTPUT_DIR>/heartRate-<YYYYMMDD>.csv | |
- First line of CSV file: <COLUMN_TIME>,<COLUMN_HEARTRATE>,<COLUMN_WORKINGOUT> | |
- Rest of the lines: <Timestamp with format YYYY-MM-DD HH:mm:ss>,<Beats per minute>,<True or False> | |
The new column in the new CSV file indicates if, at each moment, the person | |
which heart rate is recorded in this file, was working out or not. | |
The values for the new column will be set to True between | |
<WORKOUT_INIT_HOUR>:<WORKOUT_INIT_MIN>:<WORKOUT_INIT_SEC> and | |
<WORKOUT_END_HOUR>:<WORKOUT_END_MIN>:<WORKOUT_END_SEC>. | |
TIP: To set all values to False, set | |
<WORKOUT_INIT_HOUR>:<WORKOUT_INIT_MIN>:<WORKOUT_INIT_SEC> to 02:00:00 and | |
<WORKOUT_END_HOUR>:<WORKOUT_END_MIN>:<WORKOUT_END_SEC> to 01:00:00. | |
NOTICE: This code plots 2 graphs, the first one based on the data in the input | |
file and the second one equal to the first but coloring each dot with a | |
different color depending on the value in the 3 column. | |
TIP: Execute the first half of the script to find the range where the person | |
if exercising, then update | |
<WORKOUT_INIT_HOUR>:<WORKOUT_INIT_MIN>:<WORKOUT_INIT_SEC> and | |
<WORKOUT_END_HOUR>:<WORKOUT_END_MIN>:<WORKOUT_END_SEC> and execute all the | |
code. | |
""" | |
# Imports | |
from datetime import datetime as dt | |
from os.path import join | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
# Constants | |
COLUMN_TIME = 'Time' | |
COLUMN_HEARTRATE = 'BPM' | |
COLUMN_WORKINGOUT = 'WorkingOut' | |
YEAR = 2016 | |
MONTH = 12 | |
DAY = 10 | |
WORKOUT_INIT_HOUR = 2 | |
WORKOUT_INIT_MIN = 0 | |
WORKOUT_INIT_SEC = 0 | |
WORKOUT_END_HOUR = 1 | |
WORKOUT_END_MIN = 0 | |
WORKOUT_END_SEC = 0 | |
DATE_FORMAT = '%Y-%m-%d %H:%M:%S' | |
INPUT_DIR = './Raw' | |
OUTPUT_DIR = './Processed' | |
FILENAME = 'heartRate-%d%02d%02d.csv' % (YEAR, MONTH, DAY) | |
# Read dataset | |
input_file = join(INPUT_DIR, FILENAME) | |
dataset = pd.read_csv(input_file, | |
parse_dates = [COLUMN_TIME], | |
date_parser = lambda x: dt.strptime(x, DATE_FORMAT)) | |
dataset.dtypes | |
# Plot dataset | |
plt.plot(dataset[COLUMN_TIME].values, | |
dataset[COLUMN_HEARTRATE].values, | |
color = 'black') | |
plt.scatter(dataset[COLUMN_TIME].values, | |
dataset[COLUMN_HEARTRATE].values, | |
color = 'black') | |
plt.title(COLUMN_TIME + ' vs ' + COLUMN_HEARTRATE + ' (' + FILENAME + ')') | |
plt.xlabel(COLUMN_TIME) | |
plt.ylabel(COLUMN_HEARTRATE) | |
plt.show() | |
# Label dataset | |
init_time = dt(YEAR, MONTH, DAY, | |
WORKOUT_INIT_HOUR, WORKOUT_INIT_MIN, WORKOUT_INIT_SEC) | |
end_time = dt(YEAR, MONTH, DAY, | |
WORKOUT_END_HOUR, WORKOUT_END_MIN, WORKOUT_END_SEC) | |
dataset[COLUMN_WORKINGOUT] = ((dataset[COLUMN_TIME] > init_time) & | |
(dataset[COLUMN_TIME] < end_time)) | |
# Plot dataset | |
plt.plot(dataset[COLUMN_TIME].values, | |
dataset[COLUMN_HEARTRATE].values, | |
color = 'black') | |
plt.scatter(dataset[dataset[COLUMN_WORKINGOUT] == False][COLUMN_TIME].values, | |
dataset[dataset[COLUMN_WORKINGOUT] == False][COLUMN_HEARTRATE].values, | |
color = 'blue') | |
plt.scatter(dataset[dataset[COLUMN_WORKINGOUT] == True][COLUMN_TIME].values, | |
dataset[dataset[COLUMN_WORKINGOUT] == True][COLUMN_HEARTRATE].values, | |
color = 'red') | |
plt.title(COLUMN_TIME + ' vs ' + COLUMN_HEARTRATE + ' (' + FILENAME + ')') | |
plt.xlabel(COLUMN_TIME) | |
plt.ylabel(COLUMN_HEARTRATE) | |
plt.show() | |
# Export dataset | |
output_file = join(OUTPUT_DIR, FILENAME) | |
dataset.to_csv(output_file, | |
index = False, | |
date_format = DATE_FORMAT) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment