{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#This script opens up AQ PM2.5 data from the from the AirNow network\n", "#and filters and format the data accordingly" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#Important libraries and settings. Set pd.option chain error messages off.\n", "#Python CopyWarning being ridiculous for no good reason. \n", "import pandas as pd\n", "import numpy as np\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Size of data frame = (5732898, 7)\n" ] } ], "source": [ "#Read in AQ data as a pandas data frame, and clean it up a bit as we read it in.\n", "output_file = \"aq_obs_88101_08_2020.pkl\"\n", "filepath = '/uufs/chpc.utah.edu/common/home/u0703457/lin-group7/dvm/projects/UDAQ_2020-22/obs/AirNow/'\n", "filename = 'hourly_88101_PM25_2020.csv'\n", "filename = filepath+filename\n", "\n", "aq_dat = pd.read_csv(filename,sep=\",\",usecols=['State Code','County Code','Site Num','Latitude','Longitude','Date GMT','Time GMT','Sample Measurement'],parse_dates=[['Date GMT', 'Time GMT']])\n", "aq_dat = aq_dat.rename(columns={'Latitude': 'lat','Longitude':'lon','Date GMT_Time GMT':'Time','Sample Measurement':'pm25'}) \n", "print('Size of data frame = '+str(np.shape(aq_dat)))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "#Subset data based on longitude, as we really just want to toss data that fall outside of the western US,\n", "#and outside of the time slot that we are most interested in. \n", "aq_sub = aq_dat[(aq_dat['lon'] >= -112.5) & (aq_dat['lon'] < -111.5)]\n", "aq_sub = aq_sub[(aq_sub['lat'] >= 40) & (aq_sub['lat'] < 41)]\n", "aq_sub = aq_sub[aq_sub['Time'].between('2020-07-22', '2020-07-23')]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Size of data frame = (248, 7)\n" ] } ], "source": [ "print('Size of data frame = '+str(np.shape(aq_sub)))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The max PM2.5 measurement is :24.9\n", "The minumum PM2.5 measurement is :2.8\n", "The mean PM2.5 measurement is :9.88266129032258\n" ] } ], "source": [ "#Compute the mean, max and min values\n", "print('The max PM2.5 measurement is :'+str(np.max(aq_sub['pm25'])))\n", "print('The minumum PM2.5 measurement is :'+str(np.min(aq_sub['pm25'])))\n", "print('The mean PM2.5 measurement is :'+str(np.mean(aq_sub['pm25'])))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#Weird that we have negative concentrations, two ways we can deal with this...\n", "aq_sub[aq_sub['pm25'] < 0] = np.nan" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The max PM2.5 measurement is :146.6\n", "The minumum PM2.5 measurement is :0.0\n", "The minumum PM2.5 measurement is :16.958832498366377\n" ] } ], "source": [ "print('The max PM2.5 measurement is :'+str(np.max(aq_sub['pm25'])))\n", "print('The minumum PM2.5 measurement is :'+str(np.min(aq_sub['pm25'])))\n", "print('The minumum PM2.5 measurement is :'+str(np.mean(aq_sub['pm25'])))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#or do this...\n", "aq_sub = aq_sub[(aq_sub['pm25'] > 0)]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The max PM2.5 measurement is :146.6\n", "The minumum PM2.5 measurement is :0.0\n", "The minumum PM2.5 measurement is :16.958832498366377\n" ] } ], "source": [ "print('The max PM2.5 measurement is :'+str(np.max(aq_sub['pm25'])))\n", "print('The minumum PM2.5 measurement is :'+str(np.min(aq_sub['pm25'])))\n", "print('The minumum PM2.5 measurement is :'+str(np.mean(aq_sub['pm25'])))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "plt.hist(aq_sub['pm25'],20)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Time State Code County Code Site Num lat \\\n", "5061417 2020-07-22 00:00:00 49 11 4 40.902967 \n", "5061418 2020-07-22 01:00:00 49 11 4 40.902967 \n", "5061419 2020-07-22 02:00:00 49 11 4 40.902967 \n", "5061420 2020-07-22 03:00:00 49 11 4 40.902967 \n", "5061421 2020-07-22 04:00:00 49 11 4 40.902967 \n", "... ... ... ... ... ... \n", "5187881 2020-07-22 20:00:00 49 49 5010 40.136336 \n", "5187882 2020-07-22 21:00:00 49 49 5010 40.136336 \n", "5187883 2020-07-22 22:00:00 49 49 5010 40.136336 \n", "5187884 2020-07-22 23:00:00 49 49 5010 40.136336 \n", "5187885 2020-07-23 00:00:00 49 49 5010 40.136336 \n", "\n", " lon pm25 \n", "5061417 -111.884467 10.4 \n", "5061418 -111.884467 10.9 \n", "5061419 -111.884467 18.7 \n", "5061420 -111.884467 14.0 \n", "5061421 -111.884467 12.1 \n", "... ... ... \n", "5187881 -111.660502 4.8 \n", "5187882 -111.660502 5.2 \n", "5187883 -111.660502 5.3 \n", "5187884 -111.660502 5.8 \n", "5187885 -111.660502 5.0 \n", "\n", "[248 rows x 7 columns]\n" ] } ], "source": [ "print(aq_sub)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 4 }