import pandas as pd
import csv
import matplotlib.pyplot as plt
import numpy as np
import json
import os
from datetime import datetime


#copied files for 2025 from
#/uufs/chpc.utah.edu/common/home/horel-group/archive/uunet_loggernet
#where are 2024 files now??
# Read the CSV files and extract column names and units
def read_weather_csv(file_path):
    # Read the first row separately to extract column names
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Extract column names and units, removing unnecessary quotes
    column_names = [col.strip('"') for col in lines[1].strip().split(',')]
    units = {col.strip('"'): unit.strip('"') for col, unit in zip(column_names, lines[2].strip().split(','))}
    
    # Read the data starting from the 4th row
    df = pd.read_csv(file_path, skiprows=4, names=column_names)
    
    return df, dict(zip(column_names, units))

# Read the 1 min CSV files
df1, units1 = read_weather_csv('./uupya_2024.dat')
df2, units2 = read_weather_csv('./uupya_2025.dat')

# Read the 15 min soilVUE min CSV files
df1_soil, units1_soil = read_weather_csv('./uupya_soil_2024.dat')
df2_soil, units2_soil = read_weather_csv('./uupya_soil_2025.dat')

# Read the 30 min flux CSV files
df1_flux, units1_flux = read_weather_csv('./uupya_flux_2024.dat')
df2_flux, units2_flux = read_weather_csv('./uupya_flux_2025.dat')

# Merge the dataframes
df = pd.concat([df1, df2], ignore_index=True)
df_soil = pd.concat([df1_soil, df2_soil], ignore_index=True)
df_flux = pd.concat([df1_flux, df2_flux], ignore_index=True)

# Replace 'NaN' string values with np.nan in all columns
df.replace('NaN', np.nan, inplace=True)
df_soil.replace('NaN', np.nan, inplace=True)
df_flux.replace('NaN', np.nan, inplace=True)

# Convert TIMESTAMP to datetime and localize to Denver timezone
df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP'], errors='coerce')
df = df.dropna(subset=['TIMESTAMP'])  # Drop NaT values
df['TIMESTAMP'] = df['TIMESTAMP'].dt.tz_localize('UTC').dt.tz_convert('America/Denver')

df_soil['TIMESTAMP'] = pd.to_datetime(df_soil['TIMESTAMP'], errors='coerce')
df_soil = df_soil.dropna(subset=['TIMESTAMP'])  # Drop NaT values
df_soil['TIMESTAMP'] = df_soil['TIMESTAMP'].dt.tz_localize('UTC').dt.tz_convert('America/Denver')

df_flux['TIMESTAMP'] = pd.to_datetime(df_flux['TIMESTAMP'], errors='coerce')
df_flux = df_flux.dropna(subset=['TIMESTAMP'])  # Drop NaT values
df_flux['TIMESTAMP'] = df_flux['TIMESTAMP'].dt.tz_localize('UTC').dt.tz_convert('America/Denver')

df = df.set_index('TIMESTAMP')
df_soil = df_soil.set_index('TIMESTAMP')
df_flux = df_flux.set_index('TIMESTAMP')
df = df.astype(float)
df_soil = df_soil.astype(float)
df_flux = df_flux.astype(float)

# Save the merged DataFrame to a new CSV file
df.to_csv('uupya_2024_2025.csv', index=False)
df_soil.to_csv('uupya_soil_2024_2025.csv', index=False)
df_flux.to_csv('uupya_flux_2024_2025.csv', index=False)

# Print the units dictionary
print("Units:", units1)
print("Units:", units1_soil)
print("Units:", units1_flux)

/tmp/ipykernel_613800/1588017085.py:15: DtypeWarning: Columns (26,28,35,36,37,38,39,40,41,42) have mixed types. Specify dtype option on import or set low_memory=False.
  df = pd.read_csv(file_path, skiprows=4, names=column_names)
/tmp/ipykernel_613800/1588017085.py:15: DtypeWarning: Columns (27,29) have mixed types. Specify dtype option on import or set low_memory=False.
  df = pd.read_csv(file_path, skiprows=4, names=column_names)

Units: {'TIMESTAMP': 'TIMESTAMP', 'RECORD': 'RECORD', 'BattV_Avg': 'BattV_Avg', 'PTemp_C_Avg': 'PTemp_C_Avg', 'SWin_Avg': 'SWin_Avg', 'SWout_Avg': 'SWout_Avg', 'LWin_Avg': 'LWin_Avg', 'LWout_Avg': 'LWout_Avg', 'UVin_Avg': 'UVin_Avg', 'UVout_Avg': 'UVout_Avg', 'WS_ms_1_Avg': 'WS_ms_1_Avg', 'WS_ms_2_Avg': 'WS_ms_2_Avg', 'WS_ms_3_Avg': 'WS_ms_3_Avg', 'WS_ms_4_Avg': 'WS_ms_4_Avg', 'WS_ms_5_Avg': 'WS_ms_5_Avg', 'WS_ms_6_Avg': 'WS_ms_6_Avg', 'WindDir': 'WindDir', 'AirTC_2m_Avg': 'AirTC_2m_Avg', 'RH_2m': 'RH_2m', 'AirTC_5m_Avg': 'AirTC_5m_Avg', 'RH_5m': 'RH_5m', 'AirTC_10m_Avg': 'AirTC_10m_Avg', 'RH_10m': 'RH_10m', 'AirTC_10m_TC_Avg': 'AirTC_10m_TC_Avg', 'SoilTempAv_Avg(1)': 'SoilTempAv_Avg(1)', 'SoilTempAv_Avg(2)': 'SoilTempAv_Avg(2)', 'shf_Avg(1)': 'shf_Avg(1)', 'shf_Avg(2)': 'shf_Avg(2)', 'shf_mV_Avg(1)': 'shf_mV_Avg(1)', 'shf_mV_Avg(2)': 'shf_mV_Avg(2)', 'Sensit_Tot': 'Sensit_Tot', 'SenSec': 'SenSec', 'Rain_mm_Tot': 'Rain_mm_Tot', 'LWmV_Avg': 'LWmV_Avg', 'LWMWet': 'LWMWet', 'PM_1_Avg': 'PM_1_Avg', 'PM_25_Avg': 'PM_25_Avg', 'PM_4_Avg': 'PM_4_Avg', 'PM_10_Avg': 'PM_10_Avg', 'PM_Flow_Avg': 'PM_Flow_Avg', 'PM_Pressure_Avg': 'PM_Pressure_Avg', 'PM_Temp_Avg': 'PM_Temp_Avg', 'PM_RH_Avg': 'PM_RH_Avg'}
Units: {'TIMESTAMP': 'TIMESTAMP', 'RECORD': 'RECORD', 'VWC_05cm': 'VWC_05cm', 'Ka_05cm': 'Ka_05cm', 'T_05cm': 'T_05cm', 'BulkEC_05cm': 'BulkEC_05cm', 'VWC_10cm': 'VWC_10cm', 'Ka_10cm': 'Ka_10cm', 'T_10cm': 'T_10cm', 'BulkEC_10cm': 'BulkEC_10cm', 'VWC_20cm': 'VWC_20cm', 'Ka_20cm': 'Ka_20cm', 'T_20cm': 'T_20cm', 'BulkEC_20cm': 'BulkEC_20cm', 'VWC_30cm': 'VWC_30cm', 'Ka_30cm': 'Ka_30cm', 'T_30cm': 'T_30cm', 'BulkEC_30cm': 'BulkEC_30cm', 'VWC_40cm': 'VWC_40cm', 'Ka_40cm': 'Ka_40cm', 'T_40cm': 'T_40cm', 'BulkEC_40cm': 'BulkEC_40cm', 'VWC_50cm': 'VWC_50cm', 'Ka_50cm': 'Ka_50cm', 'T_50cm': 'T_50cm', 'BulkEC_50cm': 'BulkEC_50cm'}
Units: {'TIMESTAMP': 'TIMESTAMP', 'RECORD': 'RECORD', 'shf_Avg(1)': 'shf_Avg(1)', 'shf_Avg(2)': 'shf_Avg(2)', 'shf_cal(1)': 'shf_cal(1)', 'shf_cal(2)': 'shf_cal(2)', 'shf_mV_run_Avg(1)': 'shf_mV_run_Avg(1)', 'shf_mV_run_Avg(2)': 'shf_mV_run_Avg(2)', 'shf_mV_0_Avg(1)': 'shf_mV_0_Avg(1)', 'shf_mV_0_Avg(2)': 'shf_mV_0_Avg(2)', 'shf_mV_180_Avg(1)': 'shf_mV_180_Avg(1)', 'shf_mV_180_Avg(2)': 'shf_mV_180_Avg(2)', 'shf_mV_end_Avg(1)': 'shf_mV_end_Avg(1)', 'shf_mV_end_Avg(2)': 'shf_mV_end_Avg(2)', 'V_Rf_Avg(1)': 'V_Rf_Avg(1)', 'V_Rf_Avg(2)': 'V_Rf_Avg(2)', 'V_Rf_run_Avg(1)': 'V_Rf_run_Avg(1)', 'V_Rf_run_Avg(2)': 'V_Rf_run_Avg(2)', 'V_Rf_180_Avg(1)': 'V_Rf_180_Avg(1)', 'V_Rf_180_Avg(2)': 'V_Rf_180_Avg(2)'}


#Plot time series for 'shf_Avg(2)'
plt.figure(figsize=(10, 5))
plt.plot(df_flux['shf_Avg(2)'])
#plt.xlabel('Timestamp (Denver Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
#plt.grid()
#plt.xticks(rotation=45)
plt.show()


# Plot time series for 'shf_Avg(1)'
plt.figure(figsize=(10, 5))
plt.plot( df_flux['shf_Avg(1)'], label='shf_Avg(1)', color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
#plt.grid()
plt.show()


RN = df['SWin_Avg']+df['LWin_Avg']-(df['SWout_Avg']+df['LWout_Avg'])
# Plot time series for 'shf_Avg(1)'
plt.figure(figsize=(10, 5))
plt.plot(RN, color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
#plt.grid()
plt.xticks(rotation=45)
plt.show()


# Compute 30-minute means for each column
df_30min = df.resample('30T').mean().reset_index()
df_30min = df_30min.set_index('TIMESTAMP')
df_30min['RN'] = df_30min['SWin_Avg']+df_30min['LWin_Avg']-(df_30min['SWout_Avg']+df_30min['LWout_Avg'])
# Plot time series for 'shf_Avg(1)'
plt.figure(figsize=(10, 5))
plt.plot(df_30min['RN'], color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
plt.grid()
plt.show()


# Compute 30-minute means for each column
df_soil_30min = df_soil.resample('30T').mean().reset_index()
df_soil_30min = df_soil_30min.set_index('TIMESTAMP')
# Plot time series for 'shf_Avg(1)'
plt.figure(figsize=(10, 5))
plt.plot(df_soil_30min['T_05cm'], color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
plt.grid()
plt.show()


df_30min['RNG'] = df_30min['RN']-df['shf_Avg(2)']
plt.figure(figsize=(10, 5))
plt.plot(df_30min['RNG'], color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
plt.grid()
plt.show()


df_30min['Tsoil'] = df_30min['SoilTempAv_Avg(2)']
plt.figure(figsize=(10, 5))
plt.plot(df_30min['Tsoil']-df_soil_30min['T_05cm'], color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
plt.grid()
plt.show()


dfc = df_30min[df_30min['RN'] > 20]
dfcd = dfc.resample('D').mean().reset_index()
dfcd = dfcd.set_index('TIMESTAMP')
dfcd.index = dfcd.index.tz_localize(None)  # Ensure index is timezone naive
plt.figure(figsize=(10, 5))
plt.plot(dfcd['RNG'], color='b')
plt.xlabel('Time)')
#plt.ylabel(units1_flux.get('shf_Avg(2)', 'Unknown'))
#plt.title('Time Series of shf_Avg(2)')
#plt.legend()
plt.grid()
plt.show()


# Directory containing JSON files
directory = "/uufs/chpc.utah.edu/common/home/horel-group9/flux/level1"

#current version is v25
# List JSON files matching the pattern
json_files = [f for f in os.listdir(directory) if f.endswith("_v25.json") and "uupyf_combo" in f]

# Initialize empty lists to store data
data_ec150 = []
data_li710 = []
table_name_ec150 = "table_ec150"  # Adjust if the table name is different
table_name_li710 = "table_li710"  # Adjust if the table name is different

# Process each JSON file
for file in json_files:
    file_path = os.path.join(directory, file)
    with open(file_path, 'r') as f:
        json_data = json.load(f)
        
        # Extract records from the specified tables
        if table_name_ec150 in json_data['data_tables']:
            records_ec150 = json_data['data_tables'][table_name_ec150]['records']
            data_ec150.extend(records_ec150)
        
        if table_name_li710 in json_data['data_tables']:
            records_li710 = json_data['data_tables'][table_name_li710]['records']
            data_li710.extend(records_li710)

# Convert lists to DataFrames
df_ec150 = pd.DataFrame(data_ec150)
df_li710 = pd.DataFrame(data_li710)


if 'TIMESTAMP' in df_ec150.columns and 'LE' in df_ec150.columns:
    # Convert the TIMESTAMP to a pandas datetime object
    df_ec150['TIMESTAMP'] = pd.to_datetime(df_ec150['TIMESTAMP'], errors='coerce')
    df_li710['TIMESTAMP'] = pd.to_datetime(df_li710['TIMESTAMP'], errors='coerce')
    df_ec150['LE'] = pd.to_numeric(df_ec150['LE'], errors='coerce')
    df_li710['LE'] = pd.to_numeric(df_li710['LE'], errors='coerce')
    df_ec150 = df_ec150.dropna(subset=['TIMESTAMP'])  # Drop NaT values
    df_ec150['TIMESTAMP'] = df_ec150['TIMESTAMP'].dt.tz_localize('UTC').dt.tz_convert('America/Denver')
    df_li710 = df_li710.dropna(subset=['TIMESTAMP'])  # Drop NaT values
    df_li710['TIMESTAMP'] = df_li710['TIMESTAMP'].dt.tz_localize('UTC').dt.tz_convert('America/Denver')

    # Sort by TIMESTAMP
    df_ec150 = df_ec150.sort_values('TIMESTAMP')
    df_li710 = df_li710.sort_values('TIMESTAMP')
    
    # Plot the ET variable as a time series
    plt.figure(figsize=(12, 6))
    plt.plot(df_ec150['TIMESTAMP'], df_ec150['LE'].astype('float64'), label='LE', marker='o', linestyle='-')
    #plt.plot(df_li710['TIMESTAMP'], df_li710['LE'].astype('float64'), label='LE', marker='+', linestyle='--')
    plt.xlabel('Time')
    plt.ylim(-100.,400)
    plt.ylabel('LE')
    plt.title('LE Time Series')
    plt.legend()
    plt.grid()
    plt.show()
else:
    print("The required 'TIMESTAMP' or 'LE' variable is not present in the data.")


    
# Filter for March 24, 2025
start_date = pd.Timestamp('2025-03-25', tz='America/Denver') 
end_date = pd.Timestamp('2025-03-26', tz='America/Denver')
df_ec150_day = df_ec150[(df_ec150['TIMESTAMP'] >= start_date) & (df_ec150['TIMESTAMP'] < end_date)]
df_li710_day = df_li710[(df_li710['TIMESTAMP'] >= start_date) & (df_li710['TIMESTAMP'] < end_date)]

# Sort by TIMESTAMP
df_ec150_day = df_ec150_day.sort_values('TIMESTAMP')
df_li710_day = df_li710_day.sort_values('TIMESTAMP')

# Plot the ET variable as a time series
plt.figure(figsize=(12, 6))
plt.plot(df_ec150_day['TIMESTAMP'], df_ec150_day['LE'].astype('float64'), label='EC150 LE', marker='o', linestyle='-',color='grey')
plt.plot(df_li710_day['TIMESTAMP'], df_li710_day['LE'].astype('float64'), label='LI-710 LE', marker='+', linestyle='--',color='grey')
plt.plot(df_ec150_day['TIMESTAMP'], df_ec150_day['H'].astype('float64'), label='EC150 H', marker='o', linestyle='-',color='orange')
plt.plot(df_li710_day['TIMESTAMP'], df_li710_day['H'].astype('float64'), label='LI-710 H', marker='+', linestyle='--',color='orange')
plt.xlabel('Time')

plt.ylabel('Flux (W/m$^2$)')
plt.title('Time Series for 2025-03-25')
plt.legend()
plt.grid()
plt.show()


# Convert numeric columns to proper data types
def convert_numeric(df):
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

df_ec150 = convert_numeric(df_ec150)
df_li710 = convert_numeric(df_li710)

# Rename columns to distinguish sources
df_ec150 = df_ec150.add_prefix("ec_")
df_li710 = df_li710.add_prefix("li_")

# Convert timestamps to datetime
df_ec150['ec_TIMESTAMP'] = pd.to_datetime(df_ec150['ec_TIMESTAMP'], errors='coerce')
df_li710['li_TIMESTAMP'] = pd.to_datetime(df_li710['li_TIMESTAMP'], errors='coerce')

# Set ec_TIMESTAMP as index
df_ec150.set_index('ec_TIMESTAMP', inplace=True)

# Merge the two DataFrames using concat to handle different index types

df_merged = pd.concat([df_ec150, df_li710.set_index('li_TIMESTAMP')], axis=1, join='inner')
    
# Apply filters to consider only when all conditions are met simultaneously
df_merged = df_merged[(df_merged['ec_LE_QC'] <= 6) & (df_merged['ec_H_QC'] <= 6) & (df_merged['li_licor_diag'] < 128)]


df_merged['ec_LE_H'] = df_merged['ec_LE']+df_merged['ec_H']
df_merged['li_LE_H'] = df_merged['li_LE']+df_merged['li_H']

#remove really bad values. handle better later
df_merged = df_merged[(df_merged['li_LE_H'] <= 400)]
   
plt.figure(figsize=(12, 6))
plt.plot(df_merged['ec_LE_H'],label='EC', marker='o', linestyle='-')
plt.plot(df_merged['li_LE_H'], label='LI', marker='_', linestyle='--')

plt.xlabel('Time')
plt.ylim(-100.,400)
plt.ylabel('LE')
plt.title('LE Time Series')
plt.legend()
plt.grid()
plt.show()


dfmd = df_merged.resample('D').mean()
dfmd.index = pd.to_datetime(dfmd.index)  # Ensure index remains as datetime
dfmd.index = dfmd.index.tz_localize(None)  # Ensure index is timezone naive
plt.figure(figsize=(15, 5))
plt.plot(dfmd.index,dfmd['ec_LE_H'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_LE_H'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time')
plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(15, 5))
plt.plot(dfmd.index,dfmd['ec_ET'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_ET'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time')
plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(15, 5))
plt.plot(dfmd.index,dfmd['ec_ET'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_ET'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time')
plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(15, 5))
dfmd['et_per']=100*(dfmd['li_ET']-dfmd['ec_ET'])/dfmd['ec_ET']
plt.plot(dfmd.index,dfmd['et_per'], marker='o', linestyle='-')
#plt.plot(dfmd.index,dfmd['li_ET'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time')
#plt.legend()

plt.ylim(-150.,0)
plt.grid()
plt.show()


plt.figure(figsize=(15, 5))
dfmd['LE_per']=100*(dfmd['li_LE']-dfmd['ec_LE'])/dfmd['ec_LE']
plt.plot(dfmd.index,dfmd['LE_per'], marker='o', linestyle='-')
plt.xlabel('Time')

plt.ylim(-150.,50)
plt.grid()
plt.show()


plt.figure(figsize=(15, 5))
dfmd['H_per']=100*(dfmd['li_H']-dfmd['ec_H'])/dfmd['ec_H']
plt.plot(dfmd.index,dfmd['H_per'], marker='o', linestyle='-')
plt.xlabel('Time')

plt.ylim(-150.,50)
plt.grid()
plt.show()


dfmd = df_merged.resample('D').mean()
dfmd.index = pd.to_datetime(dfmd.index)  # Ensure index remains as datetime
dfmd.index = dfmd.index.tz_localize(None)  # Ensure index is timezone naive
plt.figure(figsize=(15, 5))
plt.plot(dfmd.index,dfmd['ec_LE'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_LE'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time')
plt.legend()
plt.grid()
plt.show()


dfmd = df_merged.resample('D').mean()
dfmd.index = pd.to_datetime(dfmd.index)  # Ensure index remains as datetime
dfmd.index = dfmd.index.tz_localize(None)  # Ensure index is timezone naive
plt.figure(figsize=(15, 5))
plt.plot(dfmd.index,dfmd['ec_H'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_H'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time')
plt.legend()
plt.grid()
plt.show()


#Merge dfmd with dfcd on matching index times
df_pya_pyf = dfmd.merge(dfcd, left_index=True, right_index=True, how='inner')
df_pya_pyf['li_C']=df_pya_pyf['li_LE_H']/df_pya_pyf['RNG']
df_pya_pyf['ec_C']=df_pya_pyf['ec_LE_H']/df_pya_pyf['RNG']
plt.figure(figsize=(10, 5))

plt.plot(df_pya_pyf.index,df_pya_pyf['ec_C'], marker='o', linestyle='-')
plt.plot(df_pya_pyf.index,df_pya_pyf['li_C'], marker='+', linestyle='--')
plt.xlabel('Time)')

plt.ylim(-0.5,1.0)
#plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(10, 5))
plt.plot(dfmd.index,dfmd['ec_LE_H']/df_pya_pyf['ec_C'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_LE_H']/df_pya_pyf['li_C'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time)')
#plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(10, 5))
plt.plot(dfmd.index,dfmd['ec_ET'],label='EC', marker='o', linestyle='-')
plt.plot(dfmd.index,dfmd['li_ET']/df_pya_pyf['li_C'], label='LI', marker='_', linestyle='--')
plt.xlabel('Time)')
plt.ylim(-.05,0.25)
#plt.legend()
plt.grid()
plt.show()


# Convert latent heat energy (LE in W/m^2) to evaporation (mm/30min)
L = 2.45e6  # Latent heat of vaporization (J/kg)
rho_w = 1000  # Density of water (kg/m^3)
# convert from m to mm by 1000.
seconds_per_30min = 30*60  

df_merged['li_EC'] = 1000.*df_merged['li_LE'] * seconds_per_30min / (L * rho_w)
df_merged['ec_EC'] = 1000.*df_merged['ec_LE'] * seconds_per_30min / (L * rho_w)

df_pya_pyf['date'] = df_pya_pyf.index.date  # Extract date from daily dataframe
df_merged['date'] = df_merged.index.date  # Extract date from timestamp


# scale every 30 min by the daily correction factor
#'dattim' is the 30 min times
df_merged['dattim'] = df_merged.index
df_merged = df_merged.merge(df_pya_pyf[['ec_C','li_C','date']], on='date', how='left')
df_merged['ec_EC_cor'] = df_merged['ec_EC'] / df_merged['ec_C']
df_merged['li_EC_cor'] = df_merged['li_EC'] / df_merged['li_C']
df_merged = df_merged.set_index('dattim')
df_merged.drop(columns=['date'], inplace=True)  # Remove temporary column
#df_merged.drop(columns=['dattim'], inplace=True)  # Remove temporary column


plt.figure(figsize=(10, 5))
df_merged.index = pd.to_datetime(df_merged.index)  # Ensure index remains as datetime
df_merged.index = df_merged.index.tz_localize(None)  # Ensure index is timezone naive
plt.plot(df_merged.index,df_merged['ec_EC_cor'],label='ec_EC_cor', marker='o', linestyle='-')
plt.plot(df_merged.index,df_merged['li_EC_cor'], label='li_EC_cor', marker='_', linestyle='--')
plt.plot(df_merged.index,df_merged['ec_ET'],label='ec_ET', marker='o', color='cyan',linestyle='-')
plt.plot(df_merged.index,df_merged['li_ET'], label='li_ET', marker='_', color='red',linestyle='--')
plt.xlabel('Time)')
plt.ylim(-1,2)
plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(10, 5))

#plt.plot(df_merged.index,df_merged['ec_EC_cor'],label='ec_EC_cor',  linestyle='-')
plt.plot(df_merged.index,df_merged['li_EC_cor'], label='li_EC_cor', linestyle='--')
plt.plot(df_merged.index,df_merged['ec_ET'],label='ec_ET',  color='cyan',linestyle='-')
#plt.plot(df_merged.index,df_merged['li_ET'], label='li_ET', marker='_', color='red',linestyle='--')
plt.xlabel('Time)')
plt.ylim(-1,2)
plt.legend()
plt.grid()
plt.show()


plt.figure(figsize=(10, 5))
#df_merged.index = pd.to_datetime(df_merged.index)  # Ensure index remains as datetime
#df_merged.index = df_merged.index.tz_localize(None)  # Ensure index is timezone naive

# Filter for February and March
df_filtered = df_merged.loc[df_merged.index.month.isin([2, 3])]

# df_filtered = df_merged.loc[(df_merged.index.month == 2) & (df_merged.index.day == 26)]
plt.scatter(df_filtered['ec_ET'],df_filtered['li_EC_cor'], marker='o', s=1)
plt.scatter(df_filtered['ec_ET'],df_filtered['li_EC'], color='red', s=1)
plt.ylim(-.1,0.5)
plt.grid()
plt.show()


plt.figure(figsize=(10, 5))
#df_merged.index = pd.to_datetime(df_merged.index)  # Ensure index remains as datetime
#df_merged.index = df_merged.index.tz_localize(None)  # Ensure index is timezone naive
plt.scatter(df_merged['ec_ET'],df_merged['li_ET'], marker='o',s=1)
plt.ylim(-.1,0.5)
plt.grid()
plt.show()


# Convert wind direction to radians for polar plot
wind_dir_rad = np.radians(df_pya_pyf['WindDir'])

# Create polar plot
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw={'projection': 'polar'})

# Plot footprints at different percentiles
ax.scatter(wind_dir_rad, df_pya_pyf['ec_FETCH_40'], s=5,  color='b',alpha=0.5)

ax.scatter(wind_dir_rad, df_pya_pyf['ec_FETCH_55'], s=5,  color='cyan',alpha=0.5)


ax.scatter(wind_dir_rad, df_pya_pyf['ec_FETCH_90'], s=5,  color='orange',alpha=0.5)


ax.scatter(wind_dir_rad, df_pya_pyf['ec_FETCH_MAX'], s=5, color='red', alpha=0.5)



# Labels and legend
ax.set_theta_zero_location('N')  # Set 0 degrees at the top (North)
ax.set_theta_direction(-1)  # Set direction to clockwise
ax.set_xlabel("Wind Direction (degrees)")
ax.set_ylabel("Footprint (meters)")
ax.legend(loc="upper right", bbox_to_anchor=(1.1, 1.1))

plt.title("Wind Direction v Fetch 55")
plt.show()

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

Extract the table names¶

Traverse through each table and extract the nested structure¶