patdevinwilson · April 28, 2025 15:58
diff --git a/dallas-greenfield.py b/dallas-greenfield.py
 # -*- coding: utf-8 -*-
 # RF Propagation & Terrain Modeling with Heavy.AI
 # Copyright (c) 2025 Patrick Wilson
 # 
 # Licensed under the Creative Commons Attribution 4.0 International License (CC BY 4.0)
 # You may obtain a copy of the license at
 # https://creativecommons.org/licenses/by/4.0/
 #
 # You are free to share, adapt, and build upon this work, even for commercial purposes,
 # as long as you give appropriate credit.
 #
 # NO WARRANTIES. THE SCRIPT IS PROVIDED "AS IS".

 """
 # HeavyAI Terrain and RF Prop Processing Script
 This script performs a series of geospatial data processing steps using HeavyAI,
 including:
 - Loading LiDAR and DEM (Digital Elevation Model) data.
 - Processing data strictly within the 'coppell_box' bounding box.
 - Enriching data with UTM coordinates and join keys.
 - Generating a Digital Surface Model (DSM).
 - Integrating Overture buildings data.
 - Performing KMeans clustering on building heights (conditionally using HeavyML or scikit-learn based on architecture).
 - Identifying candidate sites for antennas using a CTE approach.
 - Simulating RF propagation.

 Make sure required libraries are installed:
 # pip install geopandas boto3 pyproj heavyai pandas scikit-learn matplotlib folium overturemaps sqlalchemy
 """

 import os
 import sys
 import subprocess
 import pandas as pd
 import geopandas as gpd 
 import boto3
 from pyproj import Proj, transform, Transformer
 from sklearn.cluster import KMeans
 # import matplotlib.pyplot as plt # Commented out as plotting requires interactive environment or saving to file
 # import folium # Commented out as plotting requires interactive environment or saving to file
 import math # Added for math.floor/ceil
 import platform # Added to check system architecture

 # --- Configuration ---

 # HeavyAI connection parameters (Update with your details)
 HEAVYAI_USER = 'admin'
 HEAVYAI_PASSWORD = 'HyperInteractive' # Use environment variables or secure methods in production
 HEAVYAI_HOST = 'heavydb' # Use 'localhost' or specific IP 
 HEAVYAI_PORT = 6274
 HEAVYAI_DBNAME = 'dallas_greenfield_demo' # Database name
 HEAVYAI_PROTOCOL = 'binary'

 # Destination directory for downloaded/processed files (Adjust path as needed)
 # Ensure this directory exists and has appropriate permissions (e.g., 777)
 DESTINATION_DIR = '/var/lib/heavyai/import/dallas/' # Original path from notebook
 # Example for local execution: DESTINATION_DIR = './data/dallas/'

 # Define the bounding box in latitude/longitude to be used exclusively
 # Updated coordinates based on user request
 coppell_box_small = {
    'y_min': 32.91298062550842,
    'y_max': 32.96810526045711,
    'x_min': -97.01634084824887,
    'x_max': -96.95096678966328
 }
 print(f"Using Exclusive Bounding Box (Small): {coppell_box_small}")

 # Define the number of clusters for KMeans
 NUM_KMEANS_CLUSTERS = 25
 print(f"Number of KMeans clusters set to: {NUM_KMEANS_CLUSTERS}")


 # --- Helper Function to Execute and Print SQL ---
 def execute_sql(conn_or_cursor, sql, params=None, commit=False, ignore_errors=False):
    """Executes SQL query and prints it first."""
    print("--- Executing SQL ---")
    # Replace placeholders for printing if params are provided
    if params:
        readable_sql = sql
        for key, value in params.items():
             # Basic replacement for readability, might not cover all edge cases
             readable_sql = readable_sql.replace(f":{key}", repr(value))
        print(readable_sql)
    else:
        print(sql)
    print("---------------------")
    try:
        target_cursor = None
        if hasattr(conn_or_cursor, 'cursor'): # It's a connection object
            target_cursor = conn_or_cursor.cursor()
        else: # It's a cursor object
            target_cursor = conn_or_cursor

        if params:
            result = target_cursor.execute(sql, params)
        else:
            result = target_cursor.execute(sql)

        if commit and hasattr(conn_or_cursor, 'commit'):
            conn_or_cursor.commit()

        return target_cursor # Return the cursor for fetching results if needed
    except Exception as e:
        print(f"SQL Execution Error: {e}")
        if not ignore_errors:
            # Decide if you want to raise the error or exit
            # raise e
            sys.exit(f"Stopping script due to SQL error: {e}")
        else:
            print("Ignoring error as requested.")
            return None # Indicate error occurred but was ignored


 # --- Helper Function to Check if Table Exists ---
 def table_exists(cursor, table_name):
    """Checks if a table exists in the database."""
    # Using information_schema might depend on database version/permissions
    # An alternative is to try a simple SELECT and catch the error
    sql = f"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = '{table_name.lower()}'"
    print(f"--- Checking if table exists: {table_name} ---")
    print(sql)
    print("------------------------------------------")
    try:
        result_cursor = cursor.execute(sql)
        count = result_cursor.fetchone()[0]
        exists = count > 0
        print(f"Table '{table_name}' exists: {exists}")
        return exists
    except Exception as e:
        print(f"Error checking if table exists '{table_name}': {e}")
        # Fallback check: Try selecting limit 1
        try:
            cursor.execute(f"SELECT * FROM {table_name} LIMIT 1;")
            print(f"Table '{table_name}' exists (fallback check).")
            return True
        except Exception:
            print(f"Table '{table_name}' does not exist (fallback check).")
            return False


 # --- Main Script Logic ---

 def main():
    """Main function to run the geospatial processing pipeline."""

    # --- 1. Establish HeavyAI Connection ---
    print("\n--- 1. Establishing HeavyAI Connection ---")
    try:
        # Import heavyai dynamically to check version first
        import heavyai
        print(f"Using heavyai python library version {heavyai.__version__}")
        from heavyai import connect

        con = connect(
            user=HEAVYAI_USER,
            password=HEAVYAI_PASSWORD,
            host=HEAVYAI_HOST,
            port=HEAVYAI_PORT,
            dbname=HEAVYAI_DBNAME,
            protocol=HEAVYAI_PROTOCOL
        )
        cursor = con.cursor()
        print("HeavyAI connection successful.")
    except ImportError:
        print("Error: heavyai library not found. Please install it (`pip install heavyai`).")
        sys.exit(1)
    except Exception as e:
        print(f"Error connecting to HeavyAI: {e}")
        sys.exit(1)

    # --- 2. Setup Directories and S3 Client ---
    print("\n--- 2. Setting up Directories and S3 Client ---")
    try:
        os.makedirs(DESTINATION_DIR, exist_ok=True)
        # The notebook used `!sudo chmod 777`. Python's os.chmod uses octal.
        # Be cautious with 777 permissions. Adjust as needed for your environment.
        # os.chmod(DESTINATION_DIR, 0o777) # Might require root/sudo privileges to run the script
        print(f"Ensured destination directory exists: {DESTINATION_DIR}")
        # Add recursive chmod if needed, e.g., using subprocess or walking the tree

        s3_client = boto3.client('s3')
        print("Boto3 S3 client initialized.")
    except PermissionError as e:
         print(f"PermissionError setting up directory {DESTINATION_DIR}: {e}. Try creating manually or running with sufficient permissions.")
         sys.exit(1)
    except Exception as e:
        print(f"Error during setup: {e}")
        import traceback
        traceback.print_exc()  # Print the full traceback for debugging
        sys.exit(1)

    # --- 3. Create Bounding Box Table (Optional Visualization) ---
    print("\n--- 3. Creating Bounding Box Table (Optional) ---")
    sql_create_bbox_table = """
    CREATE TABLE IF NOT EXISTS coppell_box (
      id INTEGER,
      geom GEOMETRY(POLYGON, 4326) ENCODING COMPRESSED(32)
    );
    """
    execute_sql(cursor, sql_create_bbox_table, commit=True)

    # Define polygon based on the SMALL box
    polygon_wkt = (
        "POLYGON(("
        f"{coppell_box_small['x_min']} {coppell_box_small['y_min']}, "
        f"{coppell_box_small['x_max']} {coppell_box_small['y_min']}, "
        f"{coppell_box_small['x_max']} {coppell_box_small['y_max']}, "
        f"{coppell_box_small['x_min']} {coppell_box_small['y_max']}, "
        f"{coppell_box_small['x_min']} {coppell_box_small['y_min']}"
        "))"
    )

    bbox_data = {'id': [1], 'geom': [polygon_wkt]}
    df_bbox = pd.DataFrame(bbox_data)

    sql_insert_bbox = """
    INSERT INTO coppell_box (id, geom)
    SELECT :id, ST_GeomFromText(:geom, 4326)
    """
    # Check if table is empty before inserting to avoid duplicates
    try:
        # Execute the query and get the cursor back
        result_cursor = execute_sql(cursor, "SELECT COUNT(*) FROM coppell_box;")
        # Fetch the result from the returned cursor
        count = result_cursor.fetchone()[0]
        if count == 0:
            print("Inserting small bounding box polygon into coppell_box table.")
            for _, row in df_bbox.iterrows():
                # Pass commit=False here, commit once after loop
                execute_sql(cursor, sql_insert_bbox, {"id": row["id"], "geom": row["geom"]}, commit=False)
            con.commit() # Commit after the loop
        else:
            # Optionally, update the existing polygon if needed
            print("Bounding box polygon already exists in coppell_box table. Skipping insert.")
            # Example update (uncomment if needed):
            # sql_update_bbox = "UPDATE coppell_box SET geom = ST_GeomFromText(:geom, 4326) WHERE id = :id;"
            # execute_sql(cursor, sql_update_bbox, {"id": 1, "geom": polygon_wkt}, commit=True)
            # print("Updated existing bounding box polygon.")
    except Exception as e:
        print(f"Error inserting or checking coppell_box: {e}")


    # --- 4. Download LiDAR Data ---
    print("\n--- 4. Downloading LiDAR Data (LAZ files) ---")
    laz_files_dir = os.path.join(DESTINATION_DIR, 'laz_files')
    os.makedirs(laz_files_dir, exist_ok=True)
    print(f"LiDAR files will be downloaded to: {laz_files_dir}")

    # List of S3 file URLs for LiDAR (These cover the original larger area,
    # subsequent steps will filter based on the small bounding box)
    s3_lidar_urls = [
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8944.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8945.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8947.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8948.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB9044.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB9045.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB9047.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB9048.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8644.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8645.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8647.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8648.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8744.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8745.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8747.laz",
        "s3://usgs-lidar/Projects/TX_Pecos_Dallas_2018_D19/TX_Pecos_Dallas_B3_2018/LAZ/USGS_LPC_TX_Pecos_Dallas_2018_D19_14SPB8748.laz"
    ]

    # Function to download using boto3 with requester-pays
    def download_from_s3(s3_url, dest_dir, s3_boto_client):
        try:
            parts = s3_url.replace("s3://", "").split('/', 1)
            bucket = parts[0]
            key = parts[1]
            file_name = os.path.basename(key)
            destination_path = os.path.join(dest_dir, file_name)

            # Check if file already exists
            if os.path.exists(destination_path):
                print(f"Skipping download, file already exists: {destination_path}")
                return

            print(f"Downloading {s3_url} to {destination_path} (requester-pays)...")
            s3_boto_client.download_file(
                bucket,
                key,
                destination_path,
                ExtraArgs={"RequestPayer": "requester"}
            )
            print(f"Downloaded {file_name}")
        except Exception as e:
            print(f"Failed to download {s3_url}: {e}")
            # Optionally remove partially downloaded file
            if os.path.exists(destination_path):
                os.remove(destination_path)

    # Download the files
    for url in s3_lidar_urls:
        download_from_s3(url, laz_files_dir, s3_client)

    print("LiDAR file download process complete.")


    # --- 5. Load DEM Data ---
    print("\n--- 5. Loading DEM Data (Raster) ---")
    dem_table_name = 'usgs_dem_1m_tx_coppell_2018'
    dem_enriched_table_name = f"{dem_table_name}_enriched"

    # Drop existing tables first (optional, prevents errors if re-running)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {dem_table_name};", commit=True)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {dem_enriched_table_name};", commit=True)

    # List of S3 URLs for DEM TIFF files (These cover the original larger area,
    # subsequent steps will filter based on the small bounding box)
    s3_dem_urls = [
        "https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/TX_Pecos_Dallas_2018_D19/TIFF/USGS_1M_14_x69y365_TX_Pecos_Dallas_2018_D19.tif",
        "https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/1m/Projects/TX_Pecos_Dallas_2018_D19/TIFF/USGS_1M_14_x68y365_TX_Pecos_Dallas_2018_D19.tif"
    ]

    # dem =["USGS_1M_14_x69y365_TX_Pecos_Dallas_2018_D19.tif", "USGS_1M_14_x68y365_TX_Pecos_Dallas_2018_D19.tif"]
    
    # # Load DEM data using COPY FROM
    # for x in dem:
    #     load_raster_query = f"""
    #         COPY {dem_table_name} FROM '{x}' WITH (
    #             source_type='raster_file',
    #             raster_point_type='point'
    #         );
    #     """
    #     execute_sql(con, load_raster_query, commit=True) # Use connection for COPY

    # print(f"DEM data loading into '{dem_table_name}' complete.")

    # Load DEM data using COPY FROM
    for s3_url in s3_dem_urls:
        load_raster_query = f"""
            COPY {dem_table_name} FROM '{s3_url}' WITH (
                source_type='raster_file',
                raster_point_type='point',
                s3_region='us-west-2'
            );
        """
        execute_sql(con, load_raster_query, commit=True) # Use connection for COPY

    print(f"DEM data loading into '{dem_table_name}' complete.")

    # --- 6. Enrich DEM Data ---
    print("\n--- 6. Enriching DEM Data (Adding UTM Coords and Join Key) ---")
    # Calculate UTM bounds based on the corners of coppell_box_small

    # Define geographic bounding box (WGS84) - using small box corners
    min_lon, min_lat = coppell_box_small['x_min'], coppell_box_small['y_min']
    max_lon, max_lat = coppell_box_small['x_max'], coppell_box_small['y_max']

    # Set up transformer from WGS84 to UTM Zone 14N (EPSG:32614)
    try:
        transformer_4326_to_32614 = Transformer.from_crs("EPSG:4326", "EPSG:32614", always_xy=True)

        # Transform corner points to UTM
        min_x_utm, min_y_utm = transformer_4326_to_32614.transform(min_lon, min_lat)
        max_x_utm, max_y_utm = transformer_4326_to_32614.transform(max_lon, max_lat)

        # Floor/Ceil to integers to ensure the bounding box fully contains the transformed area
        min_x_utm_int = math.floor(min_x_utm)
        max_x_utm_int = math.ceil(max_x_utm)
        min_y_utm_int = math.floor(min_y_utm)
        max_y_utm_int = math.ceil(max_y_utm)
        print(f"Calculated UTM Bounding Box from Small Lat/Lon Box (EPSG:32614):")
        print(f"  X: {min_x_utm_int} to {max_x_utm_int}")
        print(f"  Y: {min_y_utm_int} to {max_y_utm_int}")

    except Exception as e:
        print(f"Error during coordinate transformation: {e}")
        sys.exit(1)

    # Format SQL query to create the enriched DEM table
    sql_enrich_dem = f"""CREATE TABLE IF NOT EXISTS {dem_enriched_table_name} AS
    SELECT
        raster_point AS geom,
        FLOOR(ST_X(ST_TRANSFORM(raster_point, 32614))) AS x_utm,
        FLOOR(ST_Y(ST_TRANSFORM(raster_point, 32614))) AS y_utm,
        band_1_1 AS dem,
        (
            CAST(FLOOR(ST_X(ST_TRANSFORM(raster_point, 32614))) AS TEXT) || '|' ||
            CAST(FLOOR(ST_Y(ST_TRANSFORM(raster_point, 32614))) AS TEXT)
        ) AS join_key
    FROM {dem_table_name}
    WHERE
        raster_point IS NOT NULL
        AND ST_X(ST_TRANSFORM(raster_point, 32614)) BETWEEN {min_x_utm_int} AND {max_x_utm_int}
        AND ST_Y(ST_TRANSFORM(raster_point, 32614)) BETWEEN {min_y_utm_int} AND {max_y_utm_int};
    """
    execute_sql(con, sql_enrich_dem, commit=True) # Use connection for CREATE TABLE AS
    print(f"Enriched DEM table '{dem_enriched_table_name}' created (filtered by small bounding box).")


    # --- 7. Load LiDAR Data into HeavyAI ---
    print("\n--- 7. Loading LiDAR Data into HeavyAI ---")
    lidar_table_name = "usgs_3dep_lidar_tx_coppell_2018"
    lidar_metadata_table_name = f"{lidar_table_name}_metadata" # Adjusted name slightly

    # Drop existing tables (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {lidar_table_name};", commit=True)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {lidar_metadata_table_name};", commit=True)

    # Create the main LiDAR table structure
    sql_create_lidar_table = f"""
    CREATE TABLE IF NOT EXISTS {lidar_table_name} (
        x_utm DOUBLE,
        y_utm DOUBLE,
        clutter_type TEXT ENCODING DICT(32),
        x DOUBLE,
        y DOUBLE,
        z DOUBLE,
        intensity INTEGER,
        return_num TINYINT,
        num_returns TINYINT,
        scan_direction_flag TINYINT,
        edge_of_flight_line_flag TINYINT,
        classification SMALLINT,
        scan_angle_rank TINYINT,
        x_tile INTEGER,
        y_tile INTEGER,
        zoom_level INTEGER,
        geom GEOMETRY(POINT, 4326) ENCODING COMPRESSED(32)
    );
    """
    execute_sql(cursor, sql_create_lidar_table, commit=True)

    # Generate LiDAR metadata using tf_point_cloud_metadata
    heavyai_import_laz_path = '/var/lib/heavyai/import/dallas/laz_files/*' # Path as seen by HeavyAI server
    print(f"Generating LiDAR metadata from files in: {heavyai_import_laz_path}")

    sql_create_metadata_table = f"""CREATE TABLE IF NOT EXISTS {lidar_metadata_table_name} AS
    SELECT *
    FROM TABLE(
        tf_point_cloud_metadata(
            path => '{heavyai_import_laz_path}'
        )
    );
    """
    execute_sql(con, sql_create_metadata_table, commit=True) # Use connection for CREATE TABLE AS

    # Fetch metadata into a Pandas DataFrame
    print("Fetching LiDAR metadata...")
    try:
        # Explicitly select columns to handle potential inconsistencies in tf_point_cloud_metadata output
        metadata_df = pd.read_sql(f"SELECT file_path, x_min_4326, x_max_4326, y_min_4326, y_max_4326, srid FROM {lidar_metadata_table_name}", con=con)
        print(f"Successfully fetched metadata for {len(metadata_df)} LAZ file(s).")
    except Exception as e:
        print(f"Error fetching LiDAR metadata (may indicate missing columns like 'srid'): {e}")
        print("Attempting to fetch without SRID (will default to 4326 later)...")
        try:
            # Fallback: fetch without SRID
            metadata_df = pd.read_sql(f"SELECT file_path, x_min_4326, x_max_4326, y_min_4326, y_max_4326 FROM {lidar_metadata_table_name}", con=con)
            metadata_df['srid'] = None # Add placeholder column
            print(f"Successfully fetched metadata (without SRID) for {len(metadata_df)} LAZ file(s).")
        except Exception as e2:
             print(f"Error fetching LiDAR metadata even without SRID: {e2}")
             sys.exit(1)


    # Load LiDAR points using tf_load_point_cloud for each file based on metadata
    print("Loading LiDAR points for each file...")
    processed_files = 0
    total_inserted_points = 0
    for index, row in metadata_df.iterrows():
        try:
            file_path_on_server = row['file_path']
            print(f"\nProcessing file ({index+1}/{len(metadata_df)}): {os.path.basename(file_path_on_server)}")
            print(f"  Bounds (EPSG:4326): X={row['x_min_4326']:.6f} to {row['x_max_4326']:.6f}, Y={row['y_min_4326']:.6f} to {row['y_max_4326']:.6f}")

            # Check if the file's bounds intersect the small bounding box at all
            file_xmin, file_ymin, file_xmax, file_ymax = row['x_min_4326'], row['y_min_4326'], row['x_max_4326'], row['y_max_4326']
            box_xmin, box_ymin, box_xmax, box_ymax = coppell_box_small['x_min'], coppell_box_small['y_min'], coppell_box_small['x_max'], coppell_box_small['y_max']

            if not (file_xmax >= box_xmin and file_xmin <= box_xmax and file_ymax >= box_ymin and file_ymin <= box_ymax):
                 print(f"  Skipping file: Does not intersect with the target bounding box.")
                 continue

            # Get SRID from metadata, default to 4326 if missing or invalid
            try:
                # Check if 'srid' column exists and value is not None/NaN
                if 'srid' in row and pd.notna(row['srid']):
                    source_srid = int(row['srid'])
                else:
                    raise ValueError("SRID missing or invalid in metadata")
            except (KeyError, ValueError, TypeError) as srid_err:
                print(f"  Warning: Could not determine SRID for {os.path.basename(file_path_on_server)} from metadata ({srid_err}). Defaulting to 4326.")
                source_srid = 4326 # Default to WGS84

            print(f"  Using source SRID: {source_srid}")

            # SQL to insert data from a single LAZ file, filtering by the small bounding box
            # Re-added bounding box parameters to tf_load_point_cloud call
            sql_insert_lidar = f"""
            INSERT INTO {lidar_table_name}
            SELECT
                x_utm,
                y_utm,
                clutter_type,
                x, y, z, intensity, return_num, num_returns,
                scan_direction_flag, edge_of_flight_line_flag, classification, scan_angle_rank,
                CAST(FLOOR(x_utm / 1000.0) AS INT) AS x_tile,
                CAST(FLOOR(y_utm / 1000.0) AS INT) AS y_tile,
                CAST(13 AS INT) AS zoom_level,
                ST_SetSRID(ST_Point(x, y), 4326) AS geom
            FROM (
                SELECT
                    ST_X(ST_TRANSFORM(ST_SetSRID(ST_POINT(x, y), {source_srid}), 32614)) AS x_utm,
                    ST_Y(ST_TRANSFORM(ST_SetSRID(ST_POINT(x, y), {source_srid}), 32614)) AS y_utm,
                    CASE classification
                        WHEN 1 THEN 'Processed, but unclassified'
                        WHEN 2 THEN 'Bare earth'
                        WHEN 6 THEN 'Building'
                        WHEN 7 THEN 'Low noise'
                        WHEN 9 THEN 'Water'
                        WHEN 17 THEN 'Bridge deck'
                        WHEN 18 THEN 'High noise'
                        WHEN 20 THEN 'Ignored ground (typically breakline proximity)'
                        WHEN 21 THEN 'Snow (if present and identifiable)'
                        WHEN 22 THEN 'Temporal exclusion (typically nonfavored data in intertidal zones)'
                        ELSE 'Unknown'
                    END AS clutter_type,
                    tf_load.*
                FROM TABLE(
                    tf_load_point_cloud(
                        path => '{file_path_on_server}',
                        out_srs => 'EPSG:4326',
                        use_cache => true,
                        x_min => {row['x_min_4326']},
                        x_max => {row['x_max_4326']},
                        y_min => {row['y_min_4326']},
                        y_max => {row['y_max_4326']}
                    )
                ) AS tf_load
            ) AS final_query
            WHERE
                x BETWEEN {coppell_box_small['x_min']} AND {coppell_box_small['x_max']}
                AND y BETWEEN {coppell_box_small['y_min']} AND {coppell_box_small['y_max']};
            """
            execute_sql(con, sql_insert_lidar, commit=True)
            processed_files += 1
            print(f"  Processed and inserted data (filtered by small bbox) from {os.path.basename(file_path_on_server)}")

        except Exception as e:
            # Catch errors during the processing of a single file and continue
            print(f"Error processing file {row.get('file_path', 'N/A')}: {e}")
            print("Skipping this file and continuing...")

    print(f"LiDAR data loading complete. Checked {len(metadata_df)} files, processed {processed_files} intersecting files.")


    # --- 8. Generate Digital Surface Model (DSM) ---
    print("\n--- 8. Generating 1m Digital Surface Model (DSM) from LiDAR ---")
    dsm_table_name = 'usgs_dsm_1m_tx_coppell_2018'

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {dsm_table_name};", commit=True)

    # Use tf_geo_rasterize to create a 1m DSM grid from the filtered LiDAR points
    sql_create_dsm = f"""
    CREATE TABLE IF NOT EXISTS {dsm_table_name} AS
    SELECT
        ST_SetSRID(ST_Point(x_lon, y_lat), 4326) AS location,
        x AS x_utm,
        y AS y_utm,
        z AS z,
        (CAST(FLOOR(x) AS TEXT) || '|' || CAST(FLOOR(y) AS TEXT)) AS join_key,
        CAST(x_lon AS FLOAT) AS x_lon,
        CAST(y_lat AS FLOAT) AS y_lat
    FROM (
        SELECT
            x,
            y,
            z,
            ST_X(ST_Transform(ST_SetSRID(ST_Point(x, y), 32614), 4326)) AS x_lon,
            ST_Y(ST_Transform(ST_SetSRID(ST_Point(x, y), 32614), 4326)) AS y_lat
        FROM TABLE(
            tf_geo_rasterize(
                raster => CURSOR(
                    SELECT x_utm AS x, y_utm AS y, z
                    FROM {lidar_table_name}
                    WHERE classification != 7 AND classification != 18 -- Filter out noise points
                ),
                agg_type => 'MAX', -- Use MAX for DSM
                bin_dim_meters => 1,
                geographic_coords => FALSE,
                neighborhood_fill_radius => 1,
                fill_only_nulls => TRUE
            )
        )
    ) subquery;
    """
    execute_sql(con, sql_create_dsm, commit=True) # Use connection for CREATE TABLE AS
    print(f"DSM table '{dsm_table_name}' created (based on filtered LiDAR).")


    # --- 9. Download and Load Overture Buildings Data ---
    print("\n--- 9. Downloading and Loading Overture Buildings Data ---")
    overture_geojson_filename = 'overture_buildings.geojson'
    overture_geojson_path_local = os.path.join(DESTINATION_DIR, overture_geojson_filename)
    overture_geojson_path_heavyai = f"/var/lib/heavyai/import/dallas/{overture_geojson_filename}" # Example path
    overture_table_name = "overture_buildings"

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {overture_table_name};", commit=True)

    # Download Overture data using the overturemaps CLI tool
    bbox_str = (
        f"{coppell_box_small['x_min']},{coppell_box_small['y_min']},"
        f"{coppell_box_small['x_max']},{coppell_box_small['y_max']}"
    )
    overture_command = [
        'overturemaps', 'download',
        '--bbox', bbox_str,
        '-f', 'geojson',
        '--type', 'building',
        '-o', overture_geojson_path_local
    ]

    print(f"Running Overture download command: {' '.join(overture_command)}")
    try:
        if not os.path.exists(overture_geojson_path_local):
            subprocess.run(overture_command, check=True)
            print(f"Overture buildings GeoJSON downloaded to: {overture_geojson_path_local}")
        else:
             print(f"Skipping download, Overture file already exists: {overture_geojson_path_local}")

        print(f"Loading Overture data from '{overture_geojson_path_heavyai}' into table '{overture_table_name}'...")
        sql_load_overture = f"""
        COPY {overture_table_name}
        FROM '{overture_geojson_path_heavyai}'
        WITH (source_type='geo_file');
        """
        execute_sql(con, sql_load_overture, commit=True) # Use connection for COPY
        print(f"Overture buildings data loaded into '{overture_table_name}'.")

    except FileNotFoundError:
        print("Error: 'overturemaps' command not found. Make sure it's installed and in your PATH.")
        sys.exit(1)
    except subprocess.CalledProcessError as e:
        print(f"Error during Overture download: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"Error loading Overture data into HeavyAI: {e}")
        sys.exit(1)


    # --- 10. Combine DSM, DEM, and Overture Buildings Data ---
    print("\n--- 10. Combining DSM, DEM, and Overture Buildings Data ---")
    combined_terrain_table = "usgs_overture_1m_terrain_tx_coppell" # Original name

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {combined_terrain_table};", commit=True)

    # Create a table joining DSM, enriched DEM, and Overture buildings
    sql_create_combined = f"""
    CREATE TABLE IF NOT EXISTS {combined_terrain_table} AS
    SELECT
        dsm_table.location,
        dsm_table.x_utm,
        dsm_table.y_utm,
        dsm_table.z AS dsm,
        dem.dem,
        (dsm_table.z - dem.dem) AS dsm_diff,
        dsm_table.join_key,
        ob.id AS building_id,
        CASE
            WHEN ob.geom IS NOT NULL THEN 'building'
            ELSE 'unclassified'
        END AS clutter_type
    FROM {dsm_table_name} AS dsm_table
    INNER JOIN {dem_enriched_table_name} AS dem
      ON dsm_table.join_key = dem.join_key
    LEFT JOIN {overture_table_name} AS ob
      ON ST_Intersects(ob.geom, dsm_table.location)
    WHERE dsm_table.z IS NOT NULL
      AND dem.dem IS NOT NULL;
    """
    execute_sql(con, sql_create_combined, commit=True) # Use connection for CREATE TABLE AS
    print(f"Combined terrain table '{combined_terrain_table}' created.")


    # --- 11. Aggregate Building Statistics ---
    print("\n--- 11. Aggregating Per-Building Statistics ---")
    buildings_aggregated_table = "overture_buildings_aggregated"
    buildings_enriched_table = "overture_buildings_enriched"

    # Drop existing tables (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {buildings_aggregated_table};", commit=True)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {buildings_enriched_table};", commit=True)

    # Aggregate terrain stats per building footprint
    sql_aggregate_buildings = f"""
    CREATE TABLE IF NOT EXISTS {buildings_aggregated_table} AS
    SELECT
      b.id AS building_id,
      b.class AS bldg_class,
      b.subtype AS bldg_subtype,
      b.num_floors,
      CAST(MAX(t.dsm) AS FLOAT) AS roof_elevation,
      CAST(MAX(t.dem) AS FLOAT) AS base_elevation,
      CAST(MAX(t.dsm_diff) AS FLOAT) AS building_height,
      CAST(MIN(t.dsm_diff) AS FLOAT) AS min_building_height,
      CAST(AVG(t.dsm_diff) AS FLOAT) AS avg_building_height
    FROM
      {overture_table_name} b
    JOIN
      {combined_terrain_table} t
        ON ST_Contains(b.geom, t.location)
    WHERE
      t.dsm IS NOT NULL
      AND t.dem IS NOT NULL
      AND t.dsm_diff IS NOT NULL
      AND t.dsm_diff > 0
      AND t.clutter_type = 'building'
    GROUP BY
      b.id, b.class, b.subtype, b.num_floors;
    """
    execute_sql(con, sql_aggregate_buildings, commit=True) # Use connection for CREATE TABLE AS
    print(f"Aggregated building stats table '{buildings_aggregated_table}' created.")

    # Enrich aggregated building data with fallbacks, calculated fields, and centroids
    sql_enrich_buildings = f"""
    CREATE TABLE IF NOT EXISTS {buildings_enriched_table} AS
    SELECT
        agg.building_id as building_id,
        b.class AS bldg_class,
        b.subtype AS bldg_subtype,
        b.num_floors AS num_floors_source,
        COALESCE(
            b.num_floors,
            CASE
                WHEN agg.avg_building_height <= 0 THEN 1
                WHEN LOWER(b.class) IN ('house','terrace','apartments','static_caravan','residential','detached') THEN CEIL(agg.avg_building_height / 3.0)
                WHEN LOWER(b.class) IN ('warehouse','office','retail','commercial','school','garage','industrial','hotel','church','shed','civic','public','post_office') THEN CEIL(agg.avg_building_height / 4.5)
                WHEN LOWER(b.subtype) LIKE '%residential%' THEN CEIL(agg.avg_building_height / 3.0)
                WHEN LOWER(b.subtype) LIKE '%commercial%' THEN CEIL(agg.avg_building_height / 4.5)
                ELSE GREATEST(1, CEIL(agg.avg_building_height / 3.5))
            END
        ) AS num_floors,
        agg.roof_elevation,
        agg.base_elevation,
        agg.building_height,
        agg.min_building_height,
        agg.avg_building_height,
        CAST(NULL AS TEXT) AS address,
        ST_Area(ST_Transform(b.geom, 32614)) AS floor_sq_meters,
        COALESCE(b.num_floors, 1) * ST_Area(ST_Transform(b.geom, 32614)) AS total_sq_meters,
        CASE
            WHEN LOWER(b.class) IN ('house','terrace','apartments','static_caravan','residential','detached') THEN 'residential'
            WHEN LOWER(b.class) IN ('warehouse','office','retail','commercial','school','garage','industrial','hotel','church','shed','civic','public','post_office') THEN 'commercial'
            WHEN LOWER(b.subtype) LIKE '%commercial%' THEN 'commercial'
            WHEN LOWER(b.subtype) LIKE '%residential%' THEN 'residential'
            WHEN ST_Area(ST_Transform(b.geom, 32614)) > 500 THEN 'commercial'
            ELSE 'residential'
        END AS bldg_use_category,
        ST_X(ST_Centroid(b.geom)) AS lon,
        ST_Y(ST_Centroid(b.geom)) AS lat,
        ST_X(ST_Transform(ST_Centroid(b.geom), 32614)) AS x_utm,
        ST_Y(ST_Transform(ST_Centroid(b.geom), 32614)) AS y_utm,
        b.geom
    FROM {buildings_aggregated_table} agg
    JOIN {overture_table_name} b ON agg.building_id = b.id;
    """
    execute_sql(con, sql_enrich_buildings, commit=True) # Use connection for CREATE TABLE AS
    print(f"Enriched buildings table '{buildings_enriched_table}' created.")


    # --- 12. Perform KMeans Clustering on Building Height ---
    print("\n--- 12. Performing KMeans Clustering on Building Height ---")
    kmeans_table_name = "building_kmeans" # Using a persistent table instead of temporary
    kmeans_run_successfully = False # Flag to track if KMeans step creates the table

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {kmeans_table_name};", commit=True)

    # Determine architecture
    architecture = platform.machine().lower()
    print(f"Detected architecture: {architecture}")

    # Decide whether to attempt HeavyAI ML KMeans based on architecture
    use_heavyai_kmeans = True # Default to trying HeavyAI ML
    if architecture in ('x86_64', 'amd64'):
        print("x86 architecture detected. Will attempt HeavyAI ML KMeans first.")
        use_heavyai_kmeans = True
    else:
        print("Non-x86 architecture detected. Using scikit-learn for KMeans.")
        use_heavyai_kmeans = False


    # Option 1: Use HeavyAI's built-in KMeans UDTF (if x86 and available/licensed)
    if use_heavyai_kmeans:
        print("Attempting KMeans using HeavyAI UDTF...")
        sql_kmeans_udtf = f"""
        CREATE TABLE {kmeans_table_name} AS
        SELECT
          id,
          cluster_id
        FROM TABLE(
          kmeans(
            data => CURSOR(
              SELECT
                building_id AS id,
                building_height
              FROM {buildings_enriched_table}
              WHERE building_height IS NOT NULL AND building_height > 0
            ),
            num_clusters  => {NUM_KMEANS_CLUSTERS},
            num_iterations => 100
          )
        );
        """
        try:
             # Check if input data exists before running UDTF
            check_cursor = execute_sql(cursor, f"SELECT COUNT(*) FROM {buildings_enriched_table} WHERE building_height IS NOT NULL AND building_height > 0;")
            input_count = check_cursor.fetchone()[0]
            if input_count >= NUM_KMEANS_CLUSTERS: # Check if enough points for requested clusters
                execute_sql(con, sql_kmeans_udtf, commit=True) # Use connection for CREATE TABLE AS
                print(f"KMeans clustering complete using HeavyAI UDTF. Results in '{kmeans_table_name}'.")
                kmeans_run_successfully = True # Mark KMeans as successful
            else:
                print(f"Not enough building height data ({input_count} points) found for HeavyAI KMeans with {NUM_KMEANS_CLUSTERS} clusters. Skipping UDTF.")
                use_heavyai_kmeans = False # Force fallback if not enough data
        except Exception as e:
             print(f"HeavyAI KMeans UDTF failed: {e}. Falling back to scikit-learn.")
             use_heavyai_kmeans = False # Ensure fallback happens if UDTF fails even on x86

    # Option 2: Fallback using scikit-learn (if ARM or if HeavyAI UDTF failed/skipped on x86)
    if not use_heavyai_kmeans:
        print("Performing KMeans using scikit-learn...")
        try:
            # Query the data needed for clustering
            print("Fetching building heights for clustering...")
            query_heights = f"SELECT building_id AS id, building_height FROM {buildings_enriched_table} WHERE building_height IS NOT NULL AND building_height > 0;"
            # Add error handling for pd.read_sql if connection `con` is closed or invalid
            if con is None: # Check if connection object exists
                 print("Error: HeavyAI connection object does not exist. Cannot fetch data.")
            # elif getattr(con, 'closed', True): # Check if connection is closed (safer check)
            #      print("Error: HeavyAI connection is closed. Cannot fetch data for scikit-learn KMeans.")
            else:
                # Ensure connection is valid before reading
                try:
                    # A simple query to check connection validity
                    con.execute("SELECT 1")
                except Exception as conn_err:
                    print(f"Error: HeavyAI connection seems invalid: {conn_err}. Cannot fetch data.")
                else:
                    data_df = pd.read_sql(query_heights, con=con)

                    if data_df.empty or len(data_df) < NUM_KMEANS_CLUSTERS: # Ensure enough data points for clustering
                        print(f"Not enough building height data ({len(data_df)} points) found for clustering with {NUM_KMEANS_CLUSTERS} clusters. Skipping KMeans.")
                    else:
                        print(f"Clustering {len(data_df)} buildings...")
                        X = data_df[['building_height']].values

                        num_clusters = NUM_KMEANS_CLUSTERS
                        # Adjust n_clusters if fewer unique heights than requested clusters
                        if len(data_df['building_height'].unique()) < num_clusters:
                             num_clusters = len(data_df['building_height'].unique())
                             print(f"Warning: Reducing number of clusters to {num_clusters} due to limited unique building heights.")

                        kmeans = KMeans(n_clusters=num_clusters, n_init=10, max_iter=100, init='k-means++', random_state=42)
                        data_df['cluster_id'] = kmeans.fit_predict(X)
                        print("KMeans prediction complete.")

                        # Create the results table in HeavyAI
                        sql_create_kmeans_table = f"""
                        CREATE TABLE IF NOT EXISTS {kmeans_table_name} (
                            id TEXT,
                            cluster_id INTEGER
                        );
                        """
                        execute_sql(cursor, sql_create_kmeans_table, commit=True)

                        # Insert the results back into the database
                        print(f"Inserting KMeans results into '{kmeans_table_name}'...")
                        insert_count = 0
                        for _, row in data_df.iterrows():
                            sql_insert_kmeans = f"INSERT INTO {kmeans_table_name} (id, cluster_id) VALUES (:id, :cluster_id);"
                            execute_sql(cursor, sql_insert_kmeans, {"id": row['id'], "cluster_id": int(row['cluster_id'])}, commit=False)
                            insert_count += 1
                        con.commit()
                        print(f"Inserted {insert_count} rows into '{kmeans_table_name}'.")
                        kmeans_run_successfully = True # Mark KMeans as successful

        except ImportError:
            print("Error: scikit-learn not found. Cannot perform KMeans clustering.")
        except Exception as e:
            print(f"Error during scikit-learn KMeans clustering: {e}")


    # --- 13. Identify Candidate Sites (Tallest Building per Cluster) ---
    print("\n--- 13. Identifying Candidate Sites (Tallest Building per Cluster) ---")
    candidate_sites_table = "candidate_sites"
    candidate_sites_created = False # Flag to track if this step runs

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {candidate_sites_table};", commit=True)

    # Check if KMeans ran successfully and created the table before proceeding
    if kmeans_run_successfully and table_exists(cursor, kmeans_table_name):
        # Find the building with the maximum height within each cluster using CTE
        try:
            sql_candidate_sites = f"""
            CREATE TABLE {candidate_sites_table} AS
            WITH max_heights AS (
                SELECT
                    k.cluster_id,
                    MAX(b.building_height) AS max_height
                FROM {kmeans_table_name} k
                JOIN {buildings_enriched_table} b
                    ON k.id = b.building_id
                WHERE b.building_height IS NOT NULL AND b.building_height > 0
                GROUP BY k.cluster_id
            )
            SELECT
                k.cluster_id,
                b.building_id,
                b.x_utm,
                b.y_utm,
                b.base_elevation,
                b.building_height,
                b.roof_elevation,
                b.lon,
                b.lat
            FROM {kmeans_table_name} k
            JOIN {buildings_enriched_table} b
                ON k.id = b.building_id
            JOIN max_heights mh
                ON k.cluster_id = mh.cluster_id AND b.building_height = mh.max_height
            ORDER BY k.cluster_id;
            """
            execute_sql(con, sql_candidate_sites, commit=True) # Use connection for CREATE TABLE AS
            print(f"Candidate sites table '{candidate_sites_table}' created using CTE method.")
            candidate_sites_created = True # Mark as created
        except Exception as e:
            print(f"Error identifying candidate sites using CTE: {e}")
    else:
        print(f"Skipping candidate site identification because KMeans table '{kmeans_table_name}' was not created successfully or does not exist.")


    # --- 14. Define Candidate Antennas ---
    print("\n--- 14. Defining Candidate Antennas ---")
    candidate_antennas_table = "candidate_antennas"
    candidate_antennas_created = False # Flag

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {candidate_antennas_table};", commit=True)

    # Only proceed if candidate sites were created
    if candidate_sites_created and table_exists(cursor, candidate_sites_table):
        # Create antenna definitions based on candidate sites
        try:
            sql_create_antennas = f"""
            CREATE TABLE {candidate_antennas_table} AS
            SELECT
                cluster_id AS site_id,
                'omnidirectional' AS antenna_type,
                'IBC Generic 2600MHz EDT_0' AS antenna_type_name,
                building_height + 5.0 AS height,
                roof_elevation + 5.0 AS height_amsl,
                40.0 AS power_watts,
                2600.0 AS frequency_mhz,
                0.0 AS azimuth,
                0.0 AS downtilt,
                x_utm,
                y_utm,
                lon AS longitude,
                lat AS latitude
            FROM {candidate_sites_table};
            """
            execute_sql(con, sql_create_antennas, commit=True) # Use connection for CREATE TABLE AS
            print(f"Candidate antennas table '{candidate_antennas_table}' created.")
            candidate_antennas_created = True # Mark as created
        except Exception as e:
            print(f"Error defining candidate antennas: {e}")
    else:
         print(f"Skipping antenna definition because candidate sites table '{candidate_sites_table}' was not created or does not exist.")


    # --- 15. Load Antenna Patterns ---
    print("\n--- 15. Loading Antenna Patterns ---")
    antenna_types_table = "candidate_antenna_types" # Renamed from notebook for clarity
    antenna_types_s3_url = "s3://batteries-included/usgs_coppell_texas_rf_rprop/antenna_types.csv"
    antenna_patterns_loaded = False # Flag

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {antenna_types_table};", commit=True)

    # Define table structure
    sql_create_antenna_types = f"""
    CREATE TABLE IF NOT EXISTS {antenna_types_table} (
        name TEXT ENCODING DICT(32),
        gain_str TEXT,
        manufacturer TEXT ENCODING DICT(32),
        h_pattern_key DOUBLE[],
        h_pattern_value DOUBLE[],
        v_pattern_key DOUBLE[],
        v_pattern_value DOUBLE[],
        pattern_raw TEXT ENCODING DICT(32),
        pattern_electrical_tilt INTEGER,
        physical_antenna TEXT ENCODING DICT(32),
        half_power_beamwidth DOUBLE,
        min_frequency_mhz INTEGER,
        max_frequency_mhz INTEGER,
        pattern_electrical_azimuth DOUBLE,
        frequency INTEGER,
        h_width DOUBLE,
        v_width DOUBLE,
        front_to_back INTEGER,
        tilt INTEGER,
        model TEXT ENCODING DICT(32),
        pattern_type TEXT ENCODING DICT(32),
        comments TEXT,
        gain DOUBLE
    );
    """
    execute_sql(cursor, sql_create_antenna_types, commit=True)

    # Load data from S3 using COPY FROM
    sql_load_antenna_types = f"""
    COPY {antenna_types_table}
    FROM '{antenna_types_s3_url}'
    WITH (
        header='true',
        delimiter=',',
        array_delimiter='|',
        array_marker='[]',
        s3_region='us-east-1'
    );
    """
    try:
        execute_sql(con, sql_load_antenna_types, commit=True) # Use connection for COPY
        print(f"Antenna patterns table '{antenna_types_table}' loaded successfully from S3.")
        antenna_patterns_loaded = True # Mark as loaded
    except Exception as e:
        print(f"Error loading antenna patterns from S3: {e}")


    # --- 16. Run RF Propagation Simulation ---
    print("\n--- 16. Running RF Propagation Simulation ---")
    rfp_results_table = "rfp_omni_static"

    # Drop existing table (optional)
    execute_sql(cursor, f"DROP TABLE IF EXISTS {rfp_results_table};", commit=True)

    # Check if prerequisite tables were created/loaded successfully
    if candidate_antennas_created and antenna_patterns_loaded and table_exists(cursor, combined_terrain_table):
        try:
            # Check counts again just before running
            antennas_check_cursor = execute_sql(cursor, f"SELECT COUNT(*) FROM {candidate_antennas_table};")
            antennas_count = antennas_check_cursor.fetchone()[0]

            terrain_check_cursor = execute_sql(cursor, f"SELECT COUNT(*) FROM {combined_terrain_table};")
            terrain_count = terrain_check_cursor.fetchone()[0]

            patterns_check_cursor = execute_sql(cursor, f"SELECT COUNT(*) FROM {antenna_types_table};")
            patterns_count = patterns_check_cursor.fetchone()[0]

            if antennas_count > 0 and terrain_count > 0 and patterns_count > 0:
                # Define the RF Propagation query using tf_rf_prop_max_signal
                sql_run_rfp = f"""
                CREATE TABLE {rfp_results_table} AS
                SELECT
                   *,
                   ST_SetSRID(ST_Point(x, y), 4326) AS location
                FROM TABLE(
                   tf_rf_prop_max_signal(
                      rf_sources => CURSOR(
                         SELECT
                            site_id AS rf_source_id,
                            longitude AS rf_source_x,
                            latitude AS rf_source_y,
                            height AS rf_source_z,
                            power_watts AS rf_source_signal_strength_watts,
                            frequency_mhz AS rf_source_signal_frequency_mhz,
                            azimuth AS rf_source_antenna_azimuth_degrees,
                            downtilt AS rf_source_antenna_downtilt_degrees,
                            antenna_type_name AS rf_source_antenna_type
                         FROM {candidate_antennas_table}
                      ),
                      terrain_elevations => CURSOR(
                         SELECT
                            ST_X(location) AS x,
                            ST_Y(location) AS y,
                            dem AS ground_elevation_amsl_meters,
                            dem + GREATEST(0, dsm_diff) AS terrain_elevation_amsl_meters,
                            CASE
                                WHEN clutter_type = 'building' THEN 0.5
                                ELSE 0.1
                            END AS terrain_attenuation_dbm_per_meter
                         FROM {combined_terrain_table}
                         WHERE dem IS NOT NULL AND dsm_diff IS NOT NULL
                      ),
                      antenna_patterns => CURSOR(
                         SELECT
                            name AS antenna_type,
                            gain AS antenna_gain,
                            h_pattern_key AS antenna_horizontal_degrees,
                            h_pattern_value AS antenna_horizontal_attenuation,
                            v_pattern_key AS antenna_vertical_degrees,
                            v_pattern_value AS antenna_vertical_attenuation
                         FROM {antenna_types_table}
                      ),
                      rf_source_z_is_relative_to_terrain => true,
                      geographic_coords => true,
                      bin_dim_meters => 2.5,
                      assumed_receiver_height_agl => 1.5,
                      max_ray_travel_meters => 2500,
                      initial_rays_per_source => 360,
                      rays_per_bin_autosplit_threshold => 4,
                      min_receiver_signal_strength_dbm => -120,
                      default_source_height_agl_meters => 6,
                      ray_step_bin_multiple => 1,
                      loop_grain_size => 256
                   )
                );
                """
                execute_sql(con, sql_run_rfp, commit=True) # Use connection for CREATE TABLE AS
                print(f"RF Propagation simulation complete. Results in '{rfp_results_table}'.")
            else:
                 print("One or more input tables for RF Propagation are empty after final check. Skipping simulation.")
                 print(f" - Candidate Antennas: {antennas_count} rows")
                 print(f" - Combined Terrain: {terrain_count} rows")
                 print(f" - Antenna Patterns: {patterns_count} rows")

        except Exception as e:
            print(f"Error during RF Propagation simulation: {e}")
            print("This likely means the RF Propagation UDTF is not available, not licensed, or encountered an internal error.")
    else:
        print("Skipping RF Propagation simulation because prerequisite steps (KMeans/Candidate Sites/Antenna Patterns) failed or were skipped.")


    # --- 17. Cleanup and Close Connection ---
    print("\n--- 17. Closing Connection ---")
    if 'cursor' in locals() and cursor:
        try:
            if not cursor.closed:
                 cursor.close()
        except Exception as e:
            print(f"Error closing cursor: {e}") # Handle potential errors if cursor is already closed
    if 'con' in locals() and con:
        try:
            # Check if connection is already closed before attempting to close
            # This might require checking a specific attribute depending on the DBAPI driver
            # For pyheavydb, checking if `con` is None might suffice after an error,
            # but a more robust check might be needed. Let's assume `con.close()` handles being called multiple times.
            con.close()
        except Exception as e:
            print(f"Error closing connection: {e}") # Handle potential errors if connection is already closed
    print("HeavyAI connection closed.")
    print("\n--- Script Finished ---")


 # --- Script Entry Point ---
 if __name__ == "__main__":
    main()