In [1]:
#!/uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024/bin/python
set_to_zero_for_script = 0
In [2]:
####################
## lots of history calling/collecting ceilometer data 
## as of 2025-07-15 trying to cut out synoptic and collect direct 
## Peter Whelan is taking over the ceil_sweep_v2 script soooo.. talk to him
## this attempts to go to horel-group9, open the gz.tar hex files and concatenate into monthly (?) csv flat files (level1)
## good luck 
####################


import sys
print(f'the python interpreter is at {sys.executable}')
# /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/bin/python
# conda info --envs 
# conda activate cjplotlyjune2024

print(f'the modules are: {sys.prefix}')
the python interpreter is at /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024/bin/python
the modules are: /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024

LIBRARIES¶

In [3]:
#import modules
import pandas as pd
import numpy as np
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import matplotlib.dates as mdates #format date axis
import json
#from urllib.request import urlretrieve
import urllib.request
import warnings
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

import os
import shutil
import pytz
import csv
import logging
import requests #to grab xml data ? not in this module
import plotly.express as px #plotly to make chart interactive? 
import plotly.graph_objs as go #from GPT mastermind 
from plotly.subplots import make_subplots 

import gzip #read hex for ceilometer data 
import re #cleaning gzip 
import cl2nc
from ceilopyter.readers.read_cl import read_cl_file

#jina2 stuff ? 
from jinja2 import Environment, FileSystemLoader, Template

pd.set_option('display.max_columns',40)
tz = pytz.timezone('America/Denver')
In [4]:
import cl2nc
print(dir(cl2nc))
['Dataset', 'NA_INT32', 'NA_INT64', 'NA_NETCDF', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '__version__', 'argparse', 'check', 'crc16', 'dt', 'fsdecode', 'fsencode', 'int_to_float', 'is_none', 'itertools', 'line1', 'line2', 'line20ct', 'line3', 'line3ct', 'line4', 'line4ct', 'line5', 'line6', 'line_time', 'log', 'logging', 'main', 'np', 'os', 'parse_float', 'parse_iso_time', 'postprocess', 're', 're_file_time', 're_his_time', 're_line1', 're_line1ct', 're_line2', 're_line20ct', 're_line2ct', 're_line3', 're_line3ct', 're_line4', 're_line4ct', 're_line5', 're_line6', 're_line_time_1', 're_line_time_2', 're_line_time_3', 're_none', 'read', 'read_dat', 'read_hex', 'read_hex_array', 'read_his', 'read_his_backscatter', 'read_his_period', 'read_his_time', 'read_int', 'read_str', 'signal', 'sys', 'traceback', 'write_output']

Make Directories¶

In [5]:
def directory_maker(name):
    # Full directory path
    directory_path = f"{name}"

    # Check if the folder exists
    if not os.path.exists(directory_path):
        try:
            # Create the folder
            os.mkdir(directory_path)
            print(f"Folder '{directory_path}' created.")
        except OSError as e:
            print(f"Error creating folder '{directory_path}': {e}")
    else:
        print(f"Folder '{directory_path}' already exists.")

    # Return the directory path
    return directory_path

#directory_inputs = directory_maker("live_inputs")
directory_outputs = directory_maker("live_outputs")
directory_lvl0s = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/'

directory_lvl1s = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/lvl1'
directory_tmp = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/lvl1/tmp'
#public_project_name = "ceils/livepage"
#directory_public = directory_maker(f"/uufs/chpc.utah.edu/common/home/u1426024/public_html/{public_project_name}")
Folder 'live_outputs' already exists.

Define and Rename Stations¶

In [6]:
ceil_list = ['UUCLA','UUCLB','MMLCL']

renamer = {
    'UUCLA': 'USDR1',
    'UUCLB': 'UUSYR',
    'MMLCL': 'MTMET'
}

# 2025-07-16

# UUCLA    =   USDR1     166.140.53.42   SODAR
# UUCLB    =   UUSYR     63.46.206.252   SYRACUSE
# MMLCL    =   MTMET     155.97.226.134  MTMET

read files¶

In [7]:
site = 'UUCLA'
timestamp = '202506281859'
year = '2025'
data_dir = f'{directory_lvl0s}{year}_{site}/'

filename = f'{data_dir}{site}.{timestamp}.dat.gz'

print(filename)
/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/2025_UUCLA/UUCLA.202506281859.dat.gz
In [8]:
with gzip.open(filename, 'rb') as f:
    text = f.read().decode('utf-8', errors='replace')  
    
clean_text = re.sub(r'[^\x20-\x7E\n]', '', text)

lines = clean_text.strip().split('\n')
data_lines = [line for line in lines if re.search(r'\d{4,}', line)]
df = pd.DataFrame(data_lines, columns=['Line'])

display(df)
df.head(200)
Line
0 1751133601.49
1 CL020211
2 0W ///// ///// ///// 000080008000
3 00100 10 0770 099 +45 052 02 0012 L0016HN15 005
4 00012000250002200022000270002c000230001f000200...
... ...
1155 1751137185.39
1156 CL020211
1157 0W ///// ///// ///// 000080008000
1158 00100 10 0770 098 +47 052 02 0020 L0016HN15 008
1159 000210002b00027000260002a0002f0002700022000230...

1160 rows × 1 columns

Out[8]:
Line
0 1751133601.49
1 CL020211
2 0W ///// ///// ///// 000080008000
3 00100 10 0770 099 +45 052 02 0012 L0016HN15 005
4 00012000250002200022000270002c000230001f000200...
... ...
195 000230002800025000260002a0002e0002400020000220...
196 1751134209.62
197 CL020211
198 0W ///// ///// ///// 000080008000
199 00100 10 0770 098 +45 052 02 0013 L0016HN15 006

200 rows × 1 columns

In [9]:
from ceilopyter.readers.read_cl import read_cl_file
# Read ASCII lines from the gzip file
print(filename)
with gzip.open(filename, 'rt', encoding='utf-8', errors='replace') as f:
    lines = f.readlines()
/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/2025_UUCLA/UUCLA.202506281859.dat.gz
In [10]:
# Read lines
with gzip.open(filename, 'rt', encoding='utf-8', errors='replace') as f:
    lines = [line.strip() for line in f.readlines()]

# # Group into 5-line blocks
# records = []
# for i in range(len(lines)):
#     if re.match(r'^\d+\.\d+', lines[i]):  # timestamp line
#         try:
#             ts = float(lines[i])
#             cl_id = lines[i+1]
#             status = lines[i+2]
#             header = lines[i+3]
#             profile = lines[i+4]
#             records.append((ts, cl_id, status, header, profile))
#         except IndexError:
#             continue  # incomplete block at end

# # Convert to DataFrame
# df_blocks = pd.DataFrame(records, columns=['timestamp', 'cl_id', 'status', 'header', 'profile'])

# # Parse profile into integer columns
# def decode_profile(profile_str):
#     width = 4  # each value is 4 hex chars (assumed)
#     return [int(profile_str[i:i+width], 16) for i in range(0, len(profile_str), width)]

# # Example: decode 1st profile
# decoded = decode_profile(df_blocks['profile'].iloc[0])
# print(decoded[:10])  # show first 10 bins
In [15]:
records = []
for i in range(len(lines)):
    if re.match(r'^\d+\.\d+', lines[i]):
        try:
            ts = float(lines[i])
            cl_id = lines[i+1]
            status = lines[i+2]
            header = lines[i+3]
            profile = lines[i+4]
            records.append((ts, cl_id, status, header, profile))
        except IndexError:
            continue

df = pd.DataFrame(records, columns=['timestamp', 'id', 'status', 'header', 'profile'])
df['timestamp'] = df['timestamp'].map(lambda x: f"{x:.2f}")
df['TIMESTAMP'] = df['timestamp'].apply(lambda x: datetime.utcfromtimestamp(float(x)))
df.insert(1, 'TIMESTAMP', df.pop('TIMESTAMP'))
display(df)
timestamp TIMESTAMP id status header profile
0 1751133601.49 2025-06-28 18:00:01.490 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 099 +45 052 02 0012 L0016HN15 005 00012000250002200022000270002c000230001f000200...
1 1751133617.49 2025-06-28 18:00:17.490 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 100 +45 052 02 0012 L0016HN15 007 00016000260002300022000260002c000230001e000200...
2 1751133633.49 2025-06-28 18:00:33.490 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 100 +44 052 02 0011 L0016HN15 007 00023000260002300024000280002d000230001e000200...
3 1751133649.49 2025-06-28 18:00:49.490 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 099 +45 052 02 0011 L0016HN15 005 00029000270002200023000280002c000230001e0001f0...
4 1751133665.47 2025-06-28 18:01:05.470 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 099 +45 052 02 0012 L0016HN15 005 00018000250002300022000270002c000230001e000200...
... ... ... ... ... ... ...
220 1751137121.41 2025-06-28 18:58:41.410 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 100 +47 052 02 0013 L0016HN15 007 0001d0002a00027000280002a0002f0002600021000220...
221 1751137137.39 2025-06-28 18:58:57.390 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 100 +47 052 02 0013 L0016HN15 005 000130002b0002600025000290002f0002600021000210...
222 1751137153.39 2025-06-28 18:59:13.390 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 100 +47 052 02 0016 L0016HN15 003 000130002a0002600026000290002f0002600021000230...
223 1751137169.39 2025-06-28 18:59:29.390 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 100 +47 052 02 0016 L0016HN15 006 000180002b00028000270002a0002f0002700022000230...
224 1751137185.39 2025-06-28 18:59:45.390 CL020211 0W ///// ///// ///// 000080008000 00100 10 0770 098 +47 052 02 0020 L0016HN15 008 000210002b00027000260002a0002f0002700022000230...

225 rows × 6 columns

In [16]:
# Extract the profile string from the first row (row 0)
profile_str = df.loc[0, 'profile']

# Split the string into 4-character chunks (each chunk = one int16 hex value)
chunks = [profile_str[i:i+4] for i in range(0, len(profile_str), 4)]

# Convert hex strings to signed 16-bit integers
profile_values = np.array([
    int(c, 16) if int(c, 16) < 0x8000 else int(c, 16) - 0x10000
    for c in chunks
], dtype=np.int16)

# Convert to DataFrame
profile_df = pd.DataFrame({
    'bin': np.arange(len(profile_values)),
    'signal': profile_values
})

display(profile_df)
bin signal
0 0 1
1 1 8192
2 2 9472
3 3 544
4 4 34
... ... ...
958 958 32063
959 959 -1545
960 960 55
961 961 -24575
962 962 127

963 rows × 2 columns

copy files to horel¶

In [12]:
# if set_to_zero_for_script == 0:
#     receive_directory = directory_hdrive_websitefiles


#     permission_mode = 0o775  # rwxrwxr-x

#     for ext in ['.html', '.csv']:
#         source_file_path = os.path.join(directory_outputs, f'{filename}{ext}')
#         destination_file_path = os.path.join(receive_directory, f'{filename}{ext}')


#         try:
#             # Ensure the receive directory exists
#             os.makedirs(receive_directory, exist_ok=True)

#             # Copy the file (overwrite if exists)
#             shutil.copy2(source_file_path, destination_file_path)

#             # Set file permissions
#             os.chmod(destination_file_path, permission_mode)

#             # Get file info
#             file_size = os.path.getsize(destination_file_path)
#             file_datetime = datetime.fromtimestamp(os.path.getmtime(destination_file_path))

#             print(f"Copied {destination_file_path}")
#             print(f"Size: {file_size} bytes")
#             print(f"Modified: {file_datetime}")

#         except FileNotFoundError:
#             print(f"File not found: {source_file_path}")
#         except PermissionError:
#             print("Permission denied.")
#         except shutil.Error as e:
#             print(f"Copy error: {str(e)}")