#!/uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024/bin/python
set_to_zero_for_script = 0

####################
## lots of history calling/collecting ceilometer data 
## as of 2025-07-15 trying to cut out synoptic and collect direct 
## Peter Whelan is taking over the ceil_sweep_v2 script soooo.. talk to him
## this attempts to go to horel-group9, open the gz.tar hex files and concatenate into monthly (?) csv flat files (level1)
## good luck 
####################


import sys
print(f'the python interpreter is at {sys.executable}')
# /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/bin/python
# conda info --envs 
# conda activate cjplotlyjune2024

print(f'the modules are: {sys.prefix}')

the python interpreter is at /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024/bin/python
the modules are: /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024

#import modules
import pandas as pd
import numpy as np
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import matplotlib.dates as mdates #format date axis
import json
#from urllib.request import urlretrieve
import urllib.request
import warnings
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

import os
import shutil
import pytz
import csv
import logging
import requests #to grab xml data ? not in this module
import plotly.express as px #plotly to make chart interactive? 
import plotly.graph_objs as go #from GPT mastermind 
from plotly.subplots import make_subplots 

import gzip #read hex for ceilometer data 
import re #cleaning gzip 
import cl2nc
from ceilopyter.readers.read_cl import read_cl_file

#jina2 stuff ? 
from jinja2 import Environment, FileSystemLoader, Template

pd.set_option('display.max_columns',40)
tz = pytz.timezone('America/Denver')

import cl2nc
print(dir(cl2nc))

['Dataset', 'NA_INT32', 'NA_INT64', 'NA_NETCDF', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '__version__', 'argparse', 'check', 'crc16', 'dt', 'fsdecode', 'fsencode', 'int_to_float', 'is_none', 'itertools', 'line1', 'line2', 'line20ct', 'line3', 'line3ct', 'line4', 'line4ct', 'line5', 'line6', 'line_time', 'log', 'logging', 'main', 'np', 'os', 'parse_float', 'parse_iso_time', 'postprocess', 're', 're_file_time', 're_his_time', 're_line1', 're_line1ct', 're_line2', 're_line20ct', 're_line2ct', 're_line3', 're_line3ct', 're_line4', 're_line4ct', 're_line5', 're_line6', 're_line_time_1', 're_line_time_2', 're_line_time_3', 're_none', 'read', 'read_dat', 'read_hex', 'read_hex_array', 'read_his', 'read_his_backscatter', 'read_his_period', 'read_his_time', 'read_int', 'read_str', 'signal', 'sys', 'traceback', 'write_output']

def directory_maker(name):
    # Full directory path
    directory_path = f"{name}"

    # Check if the folder exists
    if not os.path.exists(directory_path):
        try:
            # Create the folder
            os.mkdir(directory_path)
            print(f"Folder '{directory_path}' created.")
        except OSError as e:
            print(f"Error creating folder '{directory_path}': {e}")
    else:
        print(f"Folder '{directory_path}' already exists.")

    # Return the directory path
    return directory_path

#directory_inputs = directory_maker("live_inputs")
directory_outputs = directory_maker("live_outputs")
directory_lvl0s = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/'

directory_lvl1s = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/lvl1'
directory_tmp = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/lvl1/tmp'
#public_project_name = "ceils/livepage"
#directory_public = directory_maker(f"/uufs/chpc.utah.edu/common/home/u1426024/public_html/{public_project_name}")

Folder 'live_outputs' already exists.

ceil_list = ['UUCLA','UUCLB','MMLCL']

renamer = {
    'UUCLA': 'USDR1',
    'UUCLB': 'UUSYR',
    'MMLCL': 'MTMET'
}

# 2025-07-16

# UUCLA    =   USDR1     166.140.53.42   SODAR
# UUCLB    =   UUSYR     63.46.206.252   SYRACUSE
# MMLCL    =   MTMET     155.97.226.134  MTMET

site = 'UUCLA'
timestamp = '202506281859'
year = '2025'
data_dir = f'{directory_lvl0s}{year}_{site}/'

filename = f'{data_dir}{site}.{timestamp}.dat.gz'

print(filename)

/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/2025_UUCLA/UUCLA.202506281859.dat.gz

with gzip.open(filename, 'rb') as f:
    text = f.read().decode('utf-8', errors='replace')  
    
clean_text = re.sub(r'[^\x20-\x7E\n]', '', text)

lines = clean_text.strip().split('\n')
data_lines = [line for line in lines if re.search(r'\d{4,}', line)]
df = pd.DataFrame(data_lines, columns=['Line'])

display(df)
df.head(200)

from ceilopyter.readers.read_cl import read_cl_file
# Read ASCII lines from the gzip file
print(filename)
with gzip.open(filename, 'rt', encoding='utf-8', errors='replace') as f:
    lines = f.readlines()

/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/2025_UUCLA/UUCLA.202506281859.dat.gz

# Read lines
with gzip.open(filename, 'rt', encoding='utf-8', errors='replace') as f:
    lines = [line.strip() for line in f.readlines()]

# # Group into 5-line blocks
# records = []
# for i in range(len(lines)):
#     if re.match(r'^\d+\.\d+', lines[i]):  # timestamp line
#         try:
#             ts = float(lines[i])
#             cl_id = lines[i+1]
#             status = lines[i+2]
#             header = lines[i+3]
#             profile = lines[i+4]
#             records.append((ts, cl_id, status, header, profile))
#         except IndexError:
#             continue  # incomplete block at end

# # Convert to DataFrame
# df_blocks = pd.DataFrame(records, columns=['timestamp', 'cl_id', 'status', 'header', 'profile'])

# # Parse profile into integer columns
# def decode_profile(profile_str):
#     width = 4  # each value is 4 hex chars (assumed)
#     return [int(profile_str[i:i+width], 16) for i in range(0, len(profile_str), width)]

# # Example: decode 1st profile
# decoded = decode_profile(df_blocks['profile'].iloc[0])
# print(decoded[:10])  # show first 10 bins

records = []
for i in range(len(lines)):
    if re.match(r'^\d+\.\d+', lines[i]):
        try:
            ts = float(lines[i])
            cl_id = lines[i+1]
            status = lines[i+2]
            header = lines[i+3]
            profile = lines[i+4]
            records.append((ts, cl_id, status, header, profile))
        except IndexError:
            continue

df = pd.DataFrame(records, columns=['timestamp', 'id', 'status', 'header', 'profile'])
df['timestamp'] = df['timestamp'].map(lambda x: f"{x:.2f}")
df['TIMESTAMP'] = df['timestamp'].apply(lambda x: datetime.utcfromtimestamp(float(x)))
df.insert(1, 'TIMESTAMP', df.pop('TIMESTAMP'))
display(df)

# Extract the profile string from the first row (row 0)
profile_str = df.loc[0, 'profile']

# Split the string into 4-character chunks (each chunk = one int16 hex value)
chunks = [profile_str[i:i+4] for i in range(0, len(profile_str), 4)]

# Convert hex strings to signed 16-bit integers
profile_values = np.array([
    int(c, 16) if int(c, 16) < 0x8000 else int(c, 16) - 0x10000
    for c in chunks
], dtype=np.int16)

# Convert to DataFrame
profile_df = pd.DataFrame({
    'bin': np.arange(len(profile_values)),
    'signal': profile_values
})

display(profile_df)

# if set_to_zero_for_script == 0:
#     receive_directory = directory_hdrive_websitefiles


#     permission_mode = 0o775  # rwxrwxr-x

#     for ext in ['.html', '.csv']:
#         source_file_path = os.path.join(directory_outputs, f'{filename}{ext}')
#         destination_file_path = os.path.join(receive_directory, f'{filename}{ext}')


#         try:
#             # Ensure the receive directory exists
#             os.makedirs(receive_directory, exist_ok=True)

#             # Copy the file (overwrite if exists)
#             shutil.copy2(source_file_path, destination_file_path)

#             # Set file permissions
#             os.chmod(destination_file_path, permission_mode)

#             # Get file info
#             file_size = os.path.getsize(destination_file_path)
#             file_datetime = datetime.fromtimestamp(os.path.getmtime(destination_file_path))

#             print(f"Copied {destination_file_path}")
#             print(f"Size: {file_size} bytes")
#             print(f"Modified: {file_datetime}")

#         except FileNotFoundError:
#             print(f"File not found: {source_file_path}")
#         except PermissionError:
#             print("Permission denied.")
#         except shutil.Error as e:
#             print(f"Copy error: {str(e)}")

	Line
0	1751133601.49
1	CL020211
2	0W ///// ///// ///// 000080008000
3	00100 10 0770 099 +45 052 02 0012 L0016HN15 005
4	00012000250002200022000270002c000230001f000200...
...	...
1155	1751137185.39
1156	CL020211
1157	0W ///// ///// ///// 000080008000
1158	00100 10 0770 098 +47 052 02 0020 L0016HN15 008
1159	000210002b00027000260002a0002f0002700022000230...

	Line
0	1751133601.49
1	CL020211
2	0W ///// ///// ///// 000080008000
3	00100 10 0770 099 +45 052 02 0012 L0016HN15 005
4	00012000250002200022000270002c000230001f000200...
...	...
195	000230002800025000260002a0002e0002400020000220...
196	1751134209.62
197	CL020211
198	0W ///// ///// ///// 000080008000
199	00100 10 0770 098 +45 052 02 0013 L0016HN15 006

	timestamp	TIMESTAMP	id	status	header	profile
0	1751133601.49	2025-06-28 18:00:01.490	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 099 +45 052 02 0012 L0016HN15 005	00012000250002200022000270002c000230001f000200...
1	1751133617.49	2025-06-28 18:00:17.490	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 100 +45 052 02 0012 L0016HN15 007	00016000260002300022000260002c000230001e000200...
2	1751133633.49	2025-06-28 18:00:33.490	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 100 +44 052 02 0011 L0016HN15 007	00023000260002300024000280002d000230001e000200...
3	1751133649.49	2025-06-28 18:00:49.490	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 099 +45 052 02 0011 L0016HN15 005	00029000270002200023000280002c000230001e0001f0...
4	1751133665.47	2025-06-28 18:01:05.470	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 099 +45 052 02 0012 L0016HN15 005	00018000250002300022000270002c000230001e000200...
...	...	...	...	...	...	...
220	1751137121.41	2025-06-28 18:58:41.410	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 100 +47 052 02 0013 L0016HN15 007	0001d0002a00027000280002a0002f0002600021000220...
221	1751137137.39	2025-06-28 18:58:57.390	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 100 +47 052 02 0013 L0016HN15 005	000130002b0002600025000290002f0002600021000210...
222	1751137153.39	2025-06-28 18:59:13.390	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 100 +47 052 02 0016 L0016HN15 003	000130002a0002600026000290002f0002600021000230...
223	1751137169.39	2025-06-28 18:59:29.390	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 100 +47 052 02 0016 L0016HN15 006	000180002b00028000270002a0002f0002700022000230...
224	1751137185.39	2025-06-28 18:59:45.390	CL020211	0W ///// ///// ///// 000080008000	00100 10 0770 098 +47 052 02 0020 L0016HN15 008	000210002b00027000260002a0002f0002700022000230...

	bin	signal
0	0	1
1	1	8192
2	2	9472
3	3	544
4	4	34
...	...	...
958	958	32063
959	959	-1545
960	960	55
961	961	-24575
962	962	127

LIBRARIES¶

Make Directories¶

Define and Rename Stations¶

read files¶

copy files to horel¶