This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import pandas as pd | |
from loguru import logger | |
from rapidfuzz import process | |
DOWNLOADS_DIR = Path.home() / "Downloads" | |
def get_closest_match( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def detailed_column_summary(df: pd.DataFrame) -> pd.DataFrame: | |
summary = [] | |
for ix, col in enumerate(df.columns): | |
unique_vals = df[col].unique() | |
summary.append( | |
{ | |
"column_index": ix, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional | |
import numpy as np | |
import pandas as pd | |
import holidays | |
def get_holidays( | |
years: Optional[list[int]] = None, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
host: Tommy-Lees-MacBook-Air.local | |
Building DAG of jobs... | |
Retrieving .snakemake/storage/http/zenodo.org/records/13757228/files/shipdensity_global.zip from storage. | |
Retrieving .snakemake/storage/http/zenodo.org/records/14144752/files/converters.csv from storage. | |
Retrieving .snakemake/storage/http/zenodo.org/records/14144752/files/lines.csv from storage. | |
Retrieving .snakemake/storage/http/zenodo.org/records/14144752/files/transformers.csv from storage. | |
Retrieving .snakemake/storage/http/zenodo.org/records/14144752/files/links.csv from storage. | |
Retrieving .snakemake/storage/http/zenodo.org/records/14144752/files/map.html from storage. | |
Retrieving .snakemake/storage/http/gisco-services.ec.europa.eu/distribution/v2/nuts/download/ref-nuts-2021-01m.geojson.zip from storage. | |
Retrieving .snakemake/storage/http/zenodo.org/records/14144752/files/buses.csv from storage. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from geopandas import GeoDataFrame | |
from typing import Union | |
import pandas as pd | |
def geodf_to_wkt_csv(gdf: GeoDataFrame, output_file: Union[str, None] = None) -> pd.DataFrame: | |
""" | |
Converts a GeoDataFrame's geometry column to WKT format and optionally saves it to a CSV file. | |
Args: | |
gdf (GeoDataFrame): The input GeoDataFrame to be converted. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get the middle output are shapefile (MSOA) | |
curl --header 'Host: borders.ukdataservice.ac.uk' --user-agent 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0' --header 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' --header 'Accept-Language: en-US,en;q=0.5' --referer 'https://statistics.ukdataservice.ac.uk/' --header 'Upgrade-Insecure-Requests: 1' --header 'Sec-Fetch-Dest: document' --header 'Sec-Fetch-Mode: navigate' --header 'Sec-Fetch-Site: same-site' --header 'Sec-Fetch-User: ?1' 'https://borders.ukdataservice.ac.uk/ukborders/easy_download/prebuilt/shape/infuse_msoa_lyr_2011.zip' --output 'infuse_msoa_lyr_2011.zip' | |
# postcode to lsoa msoa | |
# https://geoportal.statistics.gov.uk/datasets/15469f4d3a484c37a41b441212667c22/about | |
curl --header 'Host: www.arcgis.com' --user-agent 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0' --header 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' --h |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
if __name__ == "__main__": | |
s3_client = boto3.client("s3") | |
bucket_name = BUCKET_NAME | |
object_key = "cpi_adjusted_2023.csv" | |
file_name = (downloads / "cpi_adjusted_2023.csv").expanduser().resolve().as_posix() | |
# Upload file with proper metadata |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import dask.dataframe as dd | |
import geopandas as gpd | |
from pyproj import Transformer | |
from dask.diagnostics import ProgressBar | |
def transform_coordinates(partition: dd.DataFrame) -> dd.DataFrame: | |
# Define the transformer: EPSG:27700 (BNG) to EPSG:4326 (WGS84) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Position | Bib# | Time | Surname | First Name | Category | Nationality | Affiliation | year | gender | hours | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 18 | 0 days 20:33:50 | Bunn | Spencer | MV45 | GBR | 2018 | M | 20.563888888888886 | ||
1 | 2 | 26 | 0 days 20:33:50 | Clark | Douglas | MSnr | GBR | 2018 | M | 20.563888888888886 | ||
2 | 3 | 15 | 0 days 20:45:41 | Browne | Kevin | MSnr | GBR | 2018 | M | 20.76138888888889 | ||
3 | 4 | 22 | 0 days 21:33:05 | Chataway | Matthew | MSnr | GBR | Mornington Chasers | 2018 | M | 21.551388888888887 | |
4 | 5 | 3 | 0 days 21:41:46 | Antrobus | Richard | MSnr | GBR | 2018 | M | 21.69611111111111 | ||
5 | 6 | 86 | 0 days 22:13:28 | Rees | Tom | MV45 | GBR | 2018 | M | 22.224444444444444 | ||
6 | 7 | 83 | 0 days 22:47:04 | Prosser | Clare | FV45 | GBR | Town & Country Harriers | 2018 | F | 22.784444444444443 | |
7 | 8 | 70 | 0 days 22:47:05 | McLaughlin | James | MV50 | IRL | Hook Norton Harriers | 2018 | M | 22.78472222222222 | |
8 | 9 | 9 | 0 days 22:58:50 | Bennett | James | MV40 | GBR | Transition Running | 2018 | M | 22.980555555555554 |
We can make this file beautiful and searchable if this error is corrected: It looks like row 8 should actually have 20 columns, instead of 2 in line 7.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
,Position,Bib#,Time,Name,Category,Affiliation,Aggs,Birdlip,Painswick,Coaley,Wotton,Horton,Tormarton,Cold Ashton,Weston,start_date,year,gender,hours | |
0,1.0,69,0 days 20:25:08,"Migliuolo, Tommaso",MSnr,,16:39,18:40,20:28,23:01,01:45,04:00,05:32,06:40,07:56,2019-06-29,2019,M,20.41888888888889 | |
1,2.0,93,0 days 20:34:28,"Sherley-Dale, Zen",MSnr,,16:38,18:51,20:37,23:21,02:05,04:29,05:54,06:52,08:10,2019-06-29,2019,M,20.574444444444445 | |
2,3.0,13,0 days 20:59:50,"Chapman, Toby",MSnr,Bournemouth AC,16:39,18:51,20:37,23:20,02:05,04:29,05:10,07:07,08:32,2019-06-29,2019,M,20.99722222222222 | |
3,4.0,76,0 days 21:16:53,"Nieuwenhuys, Greg",MSnr,,16:50,19:11,21:15,00:01,02:35,04:40,06:13,07:14,08:48,2019-06-29,2019,M,21.28138888888889 | |
4,5.0,55,0 days 21:40:46,"Lang, Mike",MV50,Corsham Running Club,17:03,19:17,21:15,00:04,02:40,04:57,06:30,07:40,09:01,2019-06-29,2019,M,21.679444444444446 | |
5,6.0,64,0 days 22:15:10,"Mendes, Mauricio",MV40,,16:38,19:00,20:53,23:55,02:40,04:57,06:59,08:09,09:46,2019-06-29,2019,M,22.25277777777778 | |
6,7.0 |
NewerOlder