In [1]:
#!/uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024/bin/python
set_to_zero_for_script = 0
In [2]:
####################
## lots of history calling/collecting ceilometer data
## as of 2025-07-15 trying to cut out synoptic and collect direct
## Peter Whelan is taking over the ceil_sweep_v2 script soooo.. talk to him
## this attempts to go to horel-group9, open the gz.tar hex files and concatenate into monthly (?) csv flat files (level1)
## good luck
####################
import sys
print(f'the python interpreter is at {sys.executable}')
# /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/bin/python
# conda info --envs
# conda activate cjplotlyjune2024
print(f'the modules are: {sys.prefix}')
the python interpreter is at /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024/bin/python the modules are: /uufs/chpc.utah.edu/common/home/u1426024/software/pkg/miniconda3/envs/cjplotlyjune2024
LIBRARIES¶
In [3]:
#import modules
import pandas as pd
import numpy as np
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import matplotlib.dates as mdates #format date axis
import json
#from urllib.request import urlretrieve
import urllib.request
import warnings
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import os
import shutil
import pytz
import csv
import logging
import requests #to grab xml data ? not in this module
import plotly.express as px #plotly to make chart interactive?
import plotly.graph_objs as go #from GPT mastermind
from plotly.subplots import make_subplots
import gzip #read hex for ceilometer data
import re #cleaning gzip
import cl2nc
from ceilopyter.readers.read_cl import read_cl_file
#jina2 stuff ?
from jinja2 import Environment, FileSystemLoader, Template
pd.set_option('display.max_columns',40)
tz = pytz.timezone('America/Denver')
In [4]:
import cl2nc
print(dir(cl2nc))
['Dataset', 'NA_INT32', 'NA_INT64', 'NA_NETCDF', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '__version__', 'argparse', 'check', 'crc16', 'dt', 'fsdecode', 'fsencode', 'int_to_float', 'is_none', 'itertools', 'line1', 'line2', 'line20ct', 'line3', 'line3ct', 'line4', 'line4ct', 'line5', 'line6', 'line_time', 'log', 'logging', 'main', 'np', 'os', 'parse_float', 'parse_iso_time', 'postprocess', 're', 're_file_time', 're_his_time', 're_line1', 're_line1ct', 're_line2', 're_line20ct', 're_line2ct', 're_line3', 're_line3ct', 're_line4', 're_line4ct', 're_line5', 're_line6', 're_line_time_1', 're_line_time_2', 're_line_time_3', 're_none', 'read', 'read_dat', 'read_hex', 'read_hex_array', 'read_his', 'read_his_backscatter', 'read_his_period', 'read_his_time', 'read_int', 'read_str', 'signal', 'sys', 'traceback', 'write_output']
Make Directories¶
In [5]:
def directory_maker(name):
# Full directory path
directory_path = f"{name}"
# Check if the folder exists
if not os.path.exists(directory_path):
try:
# Create the folder
os.mkdir(directory_path)
print(f"Folder '{directory_path}' created.")
except OSError as e:
print(f"Error creating folder '{directory_path}': {e}")
else:
print(f"Folder '{directory_path}' already exists.")
# Return the directory path
return directory_path
#directory_inputs = directory_maker("live_inputs")
directory_outputs = directory_maker("live_outputs")
directory_lvl0s = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/'
directory_lvl1s = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/lvl1'
directory_tmp = '/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/lvl1/tmp'
#public_project_name = "ceils/livepage"
#directory_public = directory_maker(f"/uufs/chpc.utah.edu/common/home/u1426024/public_html/{public_project_name}")
Folder 'live_outputs' already exists.
Define and Rename Stations¶
In [6]:
ceil_list = ['UUCLA','UUCLB','MMLCL']
renamer = {
'UUCLA': 'USDR1',
'UUCLB': 'UUSYR',
'MMLCL': 'MTMET'
}
# 2025-07-16
# UUCLA = USDR1 166.140.53.42 SODAR
# UUCLB = UUSYR 63.46.206.252 SYRACUSE
# MMLCL = MTMET 155.97.226.134 MTMET
read files¶
In [7]:
site = 'UUCLA'
timestamp = '202506281859'
year = '2025'
data_dir = f'{directory_lvl0s}{year}_{site}/'
filename = f'{data_dir}{site}.{timestamp}.dat.gz'
print(filename)
/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/2025_UUCLA/UUCLA.202506281859.dat.gz
In [8]:
with gzip.open(filename, 'rb') as f:
text = f.read().decode('utf-8', errors='replace')
clean_text = re.sub(r'[^\x20-\x7E\n]', '', text)
lines = clean_text.strip().split('\n')
data_lines = [line for line in lines if re.search(r'\d{4,}', line)]
df = pd.DataFrame(data_lines, columns=['Line'])
display(df)
df.head(200)
Line | |
---|---|
0 | 1751133601.49 |
1 | CL020211 |
2 | 0W ///// ///// ///// 000080008000 |
3 | 00100 10 0770 099 +45 052 02 0012 L0016HN15 005 |
4 | 00012000250002200022000270002c000230001f000200... |
... | ... |
1155 | 1751137185.39 |
1156 | CL020211 |
1157 | 0W ///// ///// ///// 000080008000 |
1158 | 00100 10 0770 098 +47 052 02 0020 L0016HN15 008 |
1159 | 000210002b00027000260002a0002f0002700022000230... |
1160 rows × 1 columns
Out[8]:
Line | |
---|---|
0 | 1751133601.49 |
1 | CL020211 |
2 | 0W ///// ///// ///// 000080008000 |
3 | 00100 10 0770 099 +45 052 02 0012 L0016HN15 005 |
4 | 00012000250002200022000270002c000230001f000200... |
... | ... |
195 | 000230002800025000260002a0002e0002400020000220... |
196 | 1751134209.62 |
197 | CL020211 |
198 | 0W ///// ///// ///// 000080008000 |
199 | 00100 10 0770 098 +45 052 02 0013 L0016HN15 006 |
200 rows × 1 columns
In [9]:
from ceilopyter.readers.read_cl import read_cl_file
# Read ASCII lines from the gzip file
print(filename)
with gzip.open(filename, 'rt', encoding='utf-8', errors='replace') as f:
lines = f.readlines()
/uufs/chpc.utah.edu/common/home/horel-group9/uunet/ceilometer/data/2025_UUCLA/UUCLA.202506281859.dat.gz
In [10]:
# Read lines
with gzip.open(filename, 'rt', encoding='utf-8', errors='replace') as f:
lines = [line.strip() for line in f.readlines()]
# # Group into 5-line blocks
# records = []
# for i in range(len(lines)):
# if re.match(r'^\d+\.\d+', lines[i]): # timestamp line
# try:
# ts = float(lines[i])
# cl_id = lines[i+1]
# status = lines[i+2]
# header = lines[i+3]
# profile = lines[i+4]
# records.append((ts, cl_id, status, header, profile))
# except IndexError:
# continue # incomplete block at end
# # Convert to DataFrame
# df_blocks = pd.DataFrame(records, columns=['timestamp', 'cl_id', 'status', 'header', 'profile'])
# # Parse profile into integer columns
# def decode_profile(profile_str):
# width = 4 # each value is 4 hex chars (assumed)
# return [int(profile_str[i:i+width], 16) for i in range(0, len(profile_str), width)]
# # Example: decode 1st profile
# decoded = decode_profile(df_blocks['profile'].iloc[0])
# print(decoded[:10]) # show first 10 bins
In [15]:
records = []
for i in range(len(lines)):
if re.match(r'^\d+\.\d+', lines[i]):
try:
ts = float(lines[i])
cl_id = lines[i+1]
status = lines[i+2]
header = lines[i+3]
profile = lines[i+4]
records.append((ts, cl_id, status, header, profile))
except IndexError:
continue
df = pd.DataFrame(records, columns=['timestamp', 'id', 'status', 'header', 'profile'])
df['timestamp'] = df['timestamp'].map(lambda x: f"{x:.2f}")
df['TIMESTAMP'] = df['timestamp'].apply(lambda x: datetime.utcfromtimestamp(float(x)))
df.insert(1, 'TIMESTAMP', df.pop('TIMESTAMP'))
display(df)
timestamp | TIMESTAMP | id | status | header | profile | |
---|---|---|---|---|---|---|
0 | 1751133601.49 | 2025-06-28 18:00:01.490 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 099 +45 052 02 0012 L0016HN15 005 | 00012000250002200022000270002c000230001f000200... |
1 | 1751133617.49 | 2025-06-28 18:00:17.490 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 100 +45 052 02 0012 L0016HN15 007 | 00016000260002300022000260002c000230001e000200... |
2 | 1751133633.49 | 2025-06-28 18:00:33.490 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 100 +44 052 02 0011 L0016HN15 007 | 00023000260002300024000280002d000230001e000200... |
3 | 1751133649.49 | 2025-06-28 18:00:49.490 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 099 +45 052 02 0011 L0016HN15 005 | 00029000270002200023000280002c000230001e0001f0... |
4 | 1751133665.47 | 2025-06-28 18:01:05.470 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 099 +45 052 02 0012 L0016HN15 005 | 00018000250002300022000270002c000230001e000200... |
... | ... | ... | ... | ... | ... | ... |
220 | 1751137121.41 | 2025-06-28 18:58:41.410 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 100 +47 052 02 0013 L0016HN15 007 | 0001d0002a00027000280002a0002f0002600021000220... |
221 | 1751137137.39 | 2025-06-28 18:58:57.390 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 100 +47 052 02 0013 L0016HN15 005 | 000130002b0002600025000290002f0002600021000210... |
222 | 1751137153.39 | 2025-06-28 18:59:13.390 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 100 +47 052 02 0016 L0016HN15 003 | 000130002a0002600026000290002f0002600021000230... |
223 | 1751137169.39 | 2025-06-28 18:59:29.390 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 100 +47 052 02 0016 L0016HN15 006 | 000180002b00028000270002a0002f0002700022000230... |
224 | 1751137185.39 | 2025-06-28 18:59:45.390 | CL020211 | 0W ///// ///// ///// 000080008000 | 00100 10 0770 098 +47 052 02 0020 L0016HN15 008 | 000210002b00027000260002a0002f0002700022000230... |
225 rows × 6 columns
In [16]:
# Extract the profile string from the first row (row 0)
profile_str = df.loc[0, 'profile']
# Split the string into 4-character chunks (each chunk = one int16 hex value)
chunks = [profile_str[i:i+4] for i in range(0, len(profile_str), 4)]
# Convert hex strings to signed 16-bit integers
profile_values = np.array([
int(c, 16) if int(c, 16) < 0x8000 else int(c, 16) - 0x10000
for c in chunks
], dtype=np.int16)
# Convert to DataFrame
profile_df = pd.DataFrame({
'bin': np.arange(len(profile_values)),
'signal': profile_values
})
display(profile_df)
bin | signal | |
---|---|---|
0 | 0 | 1 |
1 | 1 | 8192 |
2 | 2 | 9472 |
3 | 3 | 544 |
4 | 4 | 34 |
... | ... | ... |
958 | 958 | 32063 |
959 | 959 | -1545 |
960 | 960 | 55 |
961 | 961 | -24575 |
962 | 962 | 127 |
963 rows × 2 columns
copy files to horel¶
In [12]:
# if set_to_zero_for_script == 0:
# receive_directory = directory_hdrive_websitefiles
# permission_mode = 0o775 # rwxrwxr-x
# for ext in ['.html', '.csv']:
# source_file_path = os.path.join(directory_outputs, f'{filename}{ext}')
# destination_file_path = os.path.join(receive_directory, f'{filename}{ext}')
# try:
# # Ensure the receive directory exists
# os.makedirs(receive_directory, exist_ok=True)
# # Copy the file (overwrite if exists)
# shutil.copy2(source_file_path, destination_file_path)
# # Set file permissions
# os.chmod(destination_file_path, permission_mode)
# # Get file info
# file_size = os.path.getsize(destination_file_path)
# file_datetime = datetime.fromtimestamp(os.path.getmtime(destination_file_path))
# print(f"Copied {destination_file_path}")
# print(f"Size: {file_size} bytes")
# print(f"Modified: {file_datetime}")
# except FileNotFoundError:
# print(f"File not found: {source_file_path}")
# except PermissionError:
# print("Permission denied.")
# except shutil.Error as e:
# print(f"Copy error: {str(e)}")