{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# illustrate using Pandas and json IO to Access Data from Hurricane Ian" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# adding a \"widget\" to be able to zoom into figures\n", "# the % in the next line indicates that using a linux command\n", "%matplotlib widget\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "#access a function from the urllib module\n", "from urllib.request import urlretrieve\n", "#also let's use a linux command to see the file size\n", "import os\n", "\n", "#plotting on a map requires cartopy\n", "# See Chapter 13\n", "import cartopy\n", "import cartopy.crs as ccrs\n", "import cartopy.feature as cfeature" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#%pip list" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# needed to handle dates. See Chapter 15\n", "from datetime import datetime,timezone\n", "#adding a new module Pandas. See Chapter 16\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ "json is a file format that uses human-readable text to store and transmit data objects consisting of attribute-value pairs \n", "\n", "Think of it like having the ability to transmit many python values, lists, and dictionaries where each value is defined in terms of an attribute" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#get the json module \n", "import json" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will be accessing public data from a commercial site for which I need to disclose that I am involved with the company, Synoptic Data:\n", "\n", "- I am on the Board of Directors\n", "\n", "- I am a shareholder\n", "\n", "- I have a grant from that company to help with their customer support and research and development" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://api.synopticdata.com/v2/stations/precip?state=fl&start=202209271200&end=202210021200&pmode=totals&interval=day&token=bace3f05279d4de1bb2f03011843709e\n", "Saved ian_fl_ppt.json 733.522 KB\n" ] } ], "source": [ "#define time range in UTC\n", "# 5 day period\n", "start_time = '202209271200'\n", "end_time = '202210021200'\n", "#originally I was doing this by day, but that adds complications\n", "#url = \"https://api.synopticdata.com/v2/stations/precip?state=fl&start=\"+start_time+\"&end=\"+end_time+\"&pmode=intervals&interval=day&token=bace3f05279d4de1bb2f03011843709e\"\n", "#instead get the totals between the start and end time\n", "url = \"https://api.synopticdata.com/v2/stations/precip?state=fl&start=\"+start_time+\"&end=\"+end_time+\"&pmode=totals&interval=day&token=bace3f05279d4de1bb2f03011843709e\"\n", "print(url)\n", "\n", "# define the file to write the data into\n", "filename = \"ian_fl_ppt.json\"\n", "#let's try if we can get the file from the web\n", "try:\n", " #get the file over the web\n", " urlretrieve(url, filename)\n", " print(\"Saved\", filename, os.path.getsize(filename)/1000., 'KB')\n", "except:\n", " print(\"something wrong grabbing the file\")\n", " print(\"but the program continues, so may be in error\")\n", " \n", "#what's the file size?" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# STOP\n", "\n", "Click on the file to the left with that name that has a \"dictionary\" type icon {:}\n", "\n", "Click on one right pointing arrow so it points down\n", "\n", "What do you see?\n", "\n", "Click on other ones\n", "\n", "Lots of info here! Check it out!\n", "\n", "Look carefully. How many indents to get to the total for the first station?" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#read the data by opening the file and reading all of it\n", "in_file = open('ian_fl_ppt.json').read()\n", "data = json.loads(in_file)\n", "#print(data)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STATUSMNET_IDELEVATIONNAMESTIDELEV_DEMLONGITUDESTATERESTRICTEDLATITUDETIMEZONEIDPERIOD_OF_RECORD.startPERIOD_OF_RECORD.endUNITS.positionUNITS.elevationOBSERVATIONS.precipitation
0ACTIVE227SUMATRASURF126.2-84.986110FLFalse30.020560America/New_York34362004-12-27T00:00:00Z2022-10-20T16:48:00Zmft[{'count': 120, 'first_report': '2022-09-27T11...
1ACTIVE28OASISOASF116.4-81.033FLFalse25.860389America/New_York34702001-06-26T00:00:00Z2022-10-20T16:37:00Zmft[{'count': 120, 'first_report': '2022-09-27T11...
2ACTIVE120Melbourne International AirportKMLB23-80.63560FLFalse28.09973America/New_York41362002-08-12T00:00:00Z2022-10-20T17:05:00Zmft[{'count': 210, 'first_report': '2022-09-27T11...
3ACTIVE144Perry-Foley AirportKFPY65.6-83.58154FLFalse30.07081America/New_York41932002-08-14T00:00:00Z2022-10-20T16:55:00Zmft[{'count': 359, 'first_report': '2022-09-27T11...
4ACTIVE120Apalachicola, ApalachicolaKAAF13.1-85.02472FLFalse29.72694America/New_York42392002-08-14T00:00:00Z2022-10-20T17:05:00Zmft[{'count': 120, 'first_report': '2022-09-27T11...
......................................................
1343ACTIVE24926Pine Crest School2000WNone-80.12401FLFalse26.20460America/New_York1780082022-09-13T16:10:00Z2022-10-20T16:50:00Zmft[{'count': 660, 'first_report': '2022-09-27T12...
1344ACTIVE249161FSWN Emergency Response Unit2003WNone-84.21333FLFalse30.53596America/New_York1780092022-09-13T16:25:00Z2022-10-20T13:50:00Zmft[{'count': 18, 'first_report': '2022-09-27T12:...
1345ACTIVE656N3FTU CLEARWATER BEACHAV996None-82.82617FLFalse27.97200America/New_York1780362022-09-15T22:09:00Z2022-10-20T17:09:00Zmft[{'count': 1435, 'first_report': '2022-09-27T1...
1346ACTIVE6572GW2374 Big Bear LakeG2374None-82.40000FLFalse28.20000America/New_York1782452022-09-28T03:20:00Z2022-10-20T17:08:00Zmft[{'count': 1150, 'first_report': '2022-09-28T0...
1347ACTIVE65121GW2392 WindermereG2392118.1-81.60117FLFalse28.49633America/New_York1782942022-09-30T20:09:00Z2022-10-20T17:10:00Zmft[{'count': 483, 'first_report': '2022-09-30T19...
\n", "

1348 rows × 17 columns

\n", "
" ], "text/plain": [ " STATUS MNET_ID ELEVATION NAME STID \\\n", "0 ACTIVE 2 27 SUMATRA SURF1 \n", "1 ACTIVE 2 8 OASIS OASF1 \n", "2 ACTIVE 1 20 Melbourne International Airport KMLB \n", "3 ACTIVE 1 44 Perry-Foley Airport KFPY \n", "4 ACTIVE 1 20 Apalachicola, Apalachicola KAAF \n", "... ... ... ... ... ... \n", "1343 ACTIVE 249 26 Pine Crest School 2000W \n", "1344 ACTIVE 249 161 FSWN Emergency Response Unit 2003W \n", "1345 ACTIVE 65 6 N3FTU CLEARWATER BEACH AV996 \n", "1346 ACTIVE 65 72 GW2374 Big Bear Lake G2374 \n", "1347 ACTIVE 65 121 GW2392 Windermere G2392 \n", "\n", " ELEV_DEM LONGITUDE STATE RESTRICTED LATITUDE TIMEZONE \\\n", "0 26.2 -84.986110 FL False 30.020560 America/New_York \n", "1 16.4 -81.033 FL False 25.860389 America/New_York \n", "2 23 -80.63560 FL False 28.09973 America/New_York \n", "3 65.6 -83.58154 FL False 30.07081 America/New_York \n", "4 13.1 -85.02472 FL False 29.72694 America/New_York \n", "... ... ... ... ... ... ... \n", "1343 None -80.12401 FL False 26.20460 America/New_York \n", "1344 None -84.21333 FL False 30.53596 America/New_York \n", "1345 None -82.82617 FL False 27.97200 America/New_York \n", "1346 None -82.40000 FL False 28.20000 America/New_York \n", "1347 118.1 -81.60117 FL False 28.49633 America/New_York \n", "\n", " ID PERIOD_OF_RECORD.start PERIOD_OF_RECORD.end UNITS.position \\\n", "0 3436 2004-12-27T00:00:00Z 2022-10-20T16:48:00Z m \n", "1 3470 2001-06-26T00:00:00Z 2022-10-20T16:37:00Z m \n", "2 4136 2002-08-12T00:00:00Z 2022-10-20T17:05:00Z m \n", "3 4193 2002-08-14T00:00:00Z 2022-10-20T16:55:00Z m \n", "4 4239 2002-08-14T00:00:00Z 2022-10-20T17:05:00Z m \n", "... ... ... ... ... \n", "1343 178008 2022-09-13T16:10:00Z 2022-10-20T16:50:00Z m \n", "1344 178009 2022-09-13T16:25:00Z 2022-10-20T13:50:00Z m \n", "1345 178036 2022-09-15T22:09:00Z 2022-10-20T17:09:00Z m \n", "1346 178245 2022-09-28T03:20:00Z 2022-10-20T17:08:00Z m \n", "1347 178294 2022-09-30T20:09:00Z 2022-10-20T17:10:00Z m \n", "\n", " UNITS.elevation OBSERVATIONS.precipitation \n", "0 ft [{'count': 120, 'first_report': '2022-09-27T11... \n", "1 ft [{'count': 120, 'first_report': '2022-09-27T11... \n", "2 ft [{'count': 210, 'first_report': '2022-09-27T11... \n", "3 ft [{'count': 359, 'first_report': '2022-09-27T11... \n", "4 ft [{'count': 120, 'first_report': '2022-09-27T11... \n", "... ... ... \n", "1343 ft [{'count': 660, 'first_report': '2022-09-27T12... \n", "1344 ft [{'count': 18, 'first_report': '2022-09-27T12:... \n", "1345 ft [{'count': 1435, 'first_report': '2022-09-27T1... \n", "1346 ft [{'count': 1150, 'first_report': '2022-09-28T0... \n", "1347 ft [{'count': 483, 'first_report': '2022-09-30T19... \n", "\n", "[1348 rows x 17 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#now lets try to make sense of all the info available by putting it into a pandas dataframe\n", "# the json_normalize function in Pandas flattens the json structure to make it easier to handle\n", "df_json = pd.json_normalize(data,record_path=['STATION'])\n", "#STOP! \n", "#look at all the columns. Note the rows are the stations\n", "df_json" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 STATUS\n", "1 MNET_ID\n", "2 ELEVATION\n", "3 NAME\n", "4 STID\n", "5 ELEV_DEM\n", "6 LONGITUDE\n", "7 STATE\n", "8 RESTRICTED\n", "9 LATITUDE\n", "10 TIMEZONE\n", "11 ID\n", "12 PERIOD_OF_RECORD.start\n", "13 PERIOD_OF_RECORD.end\n", "14 UNITS.position\n", "15 UNITS.elevation\n", "16 OBSERVATIONS.precipitation\n" ] } ], "source": [ "#print out all the columns\n", "for col in range(len(df_json.columns)):\n", " print(col,df_json.columns[col]) " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "precip\n" ] } ], "source": [ "#the column of main interest is #16 OBSERVATIONS.precipitation\n", "#ugly name let's change it\n", "df_json.rename(columns = {'OBSERVATIONS.precipitation':'precip'}, inplace = True)\n", "print(df_json.columns[16]) " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "80.776\n", "[{'count': 164, 'first_report': '2022-09-27T11:53:00Z', 'total': 0.8, 'last_report': '2022-10-02T11:53:00Z', 'report_type': 'precip_accum_one_hour'}] \n", "[{'count': 207, 'first_report': '2022-09-27T11:58:00Z', 'total': 68.751, 'last_report': '2022-10-02T11:53:00Z', 'report_type': 'precip_accum_one_hour'}] \n" ] } ], "source": [ "#create a pandas data frame wth ID, lat, lon, and total precipitation\n", "df_ppt = df_json[['STID','LATITUDE','LONGITUDE','precip']]\n", "df_ppt = df_ppt.set_index(['STID'])\n", "print(type(df_ppt['precip']))\n", "#how do we get to the 2nd stations total precip?\n", "print(df_ppt['precip'][1][0]['total'])\n", "#there are some nan's! a way to sort that out is to compare the type of object as done below\n", "\n", "#some debugging steps while figuring this out\n", "test=df_ppt['precip'][15]\n", "print(test,type(test))\n", "test1 = df_ppt['precip'][14]\n", "print(test1,type(test1))\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1348\n" ] } ], "source": [ "#how many stations\n", "\n", "no_stid = len(df_ppt.LATITUDE)\n", "#create numpy array with that size for precip values\n", "p = np.ones(no_stid) \n", "print(no_stid)\n", "\n", "#let's again simplify the dictionaries, switch from mm to cm to get across the sense of iteration and handle nan's\n", "# nan's show up when there is not a list\n", "for no in range(0,no_stid):\n", " #is it a list. that is good. no list? then set as nan\n", " if isinstance(df_ppt['precip'][no],list):\n", " # change to cm\n", " pp = df_ppt['precip'][no][0]['total']/10.\n", " df_ppt['precip'][no][0]['total'] = pp\n", " # and for convenience assign the precip to a numpy variable\n", " p[no] = pp\n", " #print(no,df_ppt.index[no],df_ppt.Latitude[no],df_ppt.Longitude[no],p[no])\n", " else:\n", " p[no] = np.nan\n", " print('missing ppt',df_ppt.index[no])\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LATITUDELONGITUDETOTALS
STID
SURF130.020560-84.986110.0000
OASF125.860389-81.033008.0776
KMLB28.099730-80.6356010.8334
KFPY30.070810-83.581540.0000
KAAF29.726940-85.024720.0000
............
2000W26.204600-80.1240120.5994
2003W30.535960-84.213330.0000
AV99627.972000-82.826170.6604
G237428.200000-82.400005.6896
G239228.496330-81.601170.0000
\n", "

1348 rows × 3 columns

\n", "
" ], "text/plain": [ " LATITUDE LONGITUDE TOTALS\n", "STID \n", "SURF1 30.020560 -84.98611 0.0000\n", "OASF1 25.860389 -81.03300 8.0776\n", "KMLB 28.099730 -80.63560 10.8334\n", "KFPY 30.070810 -83.58154 0.0000\n", "KAAF 29.726940 -85.02472 0.0000\n", "... ... ... ...\n", "2000W 26.204600 -80.12401 20.5994\n", "2003W 30.535960 -84.21333 0.0000\n", "AV996 27.972000 -82.82617 0.6604\n", "G2374 28.200000 -82.40000 5.6896\n", "G2392 28.496330 -81.60117 0.0000\n", "\n", "[1348 rows x 3 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#now let's clean this up \n", "#put the values into the dataframe\n", "df_ppt['TOTALS'] = p.tolist()\n", "#remove the precip column that is really ugly to deal with\n", "df_ppt.drop(columns=['precip'],inplace=True)\n", "#they are a string unless they are defined as floats\n", "df_ppt.LONGITUDE = df_ppt.LONGITUDE.astype('float64')\n", "df_ppt.LATITUDE = df_ppt.LATITUDE.astype('float64')\n", "df_ppt.TOTALS = df_ppt.TOTALS.astype('float64')\n", "df_ppt" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LATITUDELONGITUDETOTALS
STID
0369W26.46131-81.77957985.7770
0834W29.12042-80.95339895.9338
SSSBU26.09212-80.10982304.4160
1866W29.26585-81.2275890.0434
1334W29.57435-81.1876682.9570
............
SSLYQ28.35534-81.4424624.8666
E939229.18833-81.0731724.7908
0323W25.71442-80.2823524.5360
G144426.49833-82.0886724.4856
1355W26.23780-80.2376924.3332
\n", "

100 rows × 3 columns

\n", "
" ], "text/plain": [ " LATITUDE LONGITUDE TOTALS\n", "STID \n", "0369W 26.46131 -81.77957 985.7770\n", "0834W 29.12042 -80.95339 895.9338\n", "SSSBU 26.09212 -80.10982 304.4160\n", "1866W 29.26585 -81.22758 90.0434\n", "1334W 29.57435 -81.18766 82.9570\n", "... ... ... ...\n", "SSLYQ 28.35534 -81.44246 24.8666\n", "E9392 29.18833 -81.07317 24.7908\n", "0323W 25.71442 -80.28235 24.5360\n", "G1444 26.49833 -82.08867 24.4856\n", "1355W 26.23780 -80.23769 24.3332\n", "\n", "[100 rows x 3 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#sort the rows by the TOTALS values\n", "ppt_sorted=df_ppt.sort_values(by=['TOTALS'],ascending=False)\n", "ppt_sorted[0:100]\n", "#are those super large realistic?\n", "# 30 inches is a lot of water ~75 cm" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LATITUDELONGITUDETOTALS
STID
0369W26.46131-81.77957985.7770
0834W29.12042-80.95339895.9338
1334W29.57435-81.1876682.9570
1342W28.62773-81.4015876.1740
SSSBU26.09212-80.10982304.4160
1866W29.26585-81.2275890.0434
\n", "
" ], "text/plain": [ " LATITUDE LONGITUDE TOTALS\n", "STID \n", "0369W 26.46131 -81.77957 985.7770\n", "0834W 29.12042 -80.95339 895.9338\n", "1334W 29.57435 -81.18766 82.9570\n", "1342W 28.62773 -81.40158 76.1740\n", "SSSBU 26.09212 -80.10982 304.4160\n", "1866W 29.26585 -81.22758 90.0434" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#let's put those in their own dataframe\n", "# and remove them from the main dataframe for now and figure out what's haywire later\n", "df_ppt_big = df_ppt[df_ppt['TOTALS'] > 75.]\n", "df_ppt_big" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LATITUDELONGITUDETOTALS
STID
0335W27.22227-81.86353-0.4064
0368W26.42380-81.42700-1.7264
0370W27.00014-82.07607-6.6548
0465W25.74262-80.34718-1.0160
0511W27.77185-82.63815-44.6786
0525W28.10286-81.62503-1.8800
\n", "
" ], "text/plain": [ " LATITUDE LONGITUDE TOTALS\n", "STID \n", "0335W 27.22227 -81.86353 -0.4064\n", "0368W 26.42380 -81.42700 -1.7264\n", "0370W 27.00014 -82.07607 -6.6548\n", "0465W 25.74262 -80.34718 -1.0160\n", "0511W 27.77185 -82.63815 -44.6786\n", "0525W 28.10286 -81.62503 -1.8800" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#negative precipitation totals?\n", "#that means calculating the totals over multiple days can be messed up\n", "#let's remove those for now and figure out what's haywire later\n", "df_ppt_neg = df_ppt[df_ppt['TOTALS'] < 0.]\n", "df_ppt_neg" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# let's remove those rows for too big and negative values\n", "# first find which indices are associated with those conditions\n", "# then drop and use the inplace=True to keep in the same dataframe\n", "df_ppt.drop((df_ppt.loc[df_ppt['TOTALS']<0.].index), inplace=True)\n", "df_ppt.drop((df_ppt.loc[df_ppt['TOTALS']>75.].index), inplace=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " LATITUDE LONGITUDE TOTALS\n", "count 1336.000000 1336.000000 1336.000000\n", "mean 28.368475 -82.292363 7.840365\n", "std 1.694633 1.878818 10.357789\n", "min 9.004970 -87.500500 0.000000\n", "5% 25.787155 -86.563060 0.000000\n", "10% 26.147575 -85.447180 0.000000\n", "25% 27.195320 -82.779832 0.000000\n", "33% 27.671501 -82.559222 0.217170\n", "50% 28.195995 -81.896085 4.762500\n", "66% 29.324017 -81.315432 8.966200\n", "75% 30.035372 -80.899933 10.947500\n", "90% 30.486155 -80.265525 20.104100\n", "95% 30.654455 -80.160800 28.303275\n", "max 30.974020 -79.589440 73.685600\n" ] } ], "source": [ "# get some of the basic stats to check\n", "basic_vals = df_ppt.describe(percentiles=[.05,.10,.25,.33,.50,.66,.75,.90,.95])\n", "print(basic_vals)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a8af05cf2cbe42dc8d3a76663cadaa15", "version_major": 2, "version_minor": 0 }, "image/png": "", "text/html": [ "\n", "
\n", "
\n", " Figure\n", "
\n", " \n", "
\n", " " ], "text/plain": [ "Canvas(header_visible=False, toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Bac…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plot the cumulative histogram for precipitation in Florida\n", "fig1,ax = plt.subplots(1,1,figsize=(10,5))\n", "# Hide the Figure name at the top of the figure\n", "fig1.canvas.header_visible = False\n", "# Always showthe toolbar\n", "fig1.canvas.toolbar_visible = True\n", "#using the numpy array p defined earlier\n", "n_bins = len(p)\n", "n, bins, patches = ax.hist(p, n_bins, density='True', histtype='step',\n", " cumulative=True, label='Empirical')\n", "ax.set(xlabel=\"Precipitation (cm)\",ylabel='Cumulative Empirical Probability')\n", "ax.set(xlim=(0,42.))\n", "ax.set_xticks(np.arange(0,42.,step=2))\n", "ax.set_yticks(np.arange(0, 1.1, step=0.10))\n", "ax.grid(linestyle='--', color='grey', linewidth=.2)\n", "ax.set(title=\"Florida Precipitation (cm)\")\n", "plt.savefig('fl_ppt.png')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " LATITUDE LONGITUDE TOTALS\n", "STID \n", "KDAB 29.173540 -81.071860 34.9952\n", "KMCO 28.418260 -81.324130 33.5320\n", "KSFB 28.783330 -81.250000 40.8988\n", "KTTS 28.616670 -80.700000 28.5537\n", "MRFF1 28.640833 -80.730833 31.2170\n", "... ... ... ...\n", "1980W 26.462070 -80.078610 26.2890\n", "PNAFL 28.080900 -81.411400 26.1366\n", "1991W 28.181830 -82.159180 26.8478\n", "1993W 26.703140 -80.035940 23.5204\n", "2000W 26.204600 -80.124010 20.5994\n", "\n", "[138 rows x 3 columns]\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "395a11e3e8694144abe604715229e3ab", "version_major": 2, "version_minor": 0 }, "image/png": "", "text/html": [ "\n", "
\n", "
\n", " Figure\n", "
\n", " \n", "
\n", " " ], "text/plain": [ "Canvas(header_visible=False, toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Bac…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "fig2 = plt.figure(figsize=(5,10))\n", "# Hide the Figure name at the top of the figure\n", "fig2.canvas.header_visible = False\n", "# Always showthe toolbar\n", "fig2.canvas.toolbar_visible = True\n", "#define that the most basic projection: cylindricl equidistant: equal in lat and lon\n", "ax = plt.axes(projection=ccrs.PlateCarree())\n", "#what part of the globe?\n", "ax.set_extent([-85., -78., 23.5, 32.], ccrs.PlateCarree())\n", "#dd state outlines\n", "states = cartopy.feature.NaturalEarthFeature(\n", " category='cultural', scale='50m', facecolor='none',\n", " name='admin_1_states_provinces')\n", "ax.add_feature(states,zorder=2,edgecolor='darkslategrey',linewidth=0.8,alpha = 0.5)\n", "\n", "# get locations as numpy arrays for plotting\n", "x = df_ppt.LONGITUDE.to_numpy()\n", "y = df_ppt.LATITUDE.to_numpy()\n", "\n", "#plot all locations with a black dot\n", "plt.scatter(x,y,zorder=2,s=5,c=\"grey\")\n", "\n", "#plot places with more than 20 cm as a larger green dot\n", "#which locations have more than 20 cm?\n", "p_gt_20 = df_ppt[df_ppt['TOTALS'] >= 20.]\n", "print(p_gt_20)\n", "x_g20 = p_gt_20.LONGITUDE.to_numpy()\n", "y_g20 = p_gt_20.LATITUDE.to_numpy()\n", "plt.scatter(x_g20,y_g20,zorder=10,s=20,c=\"green\")\n", "\n", "#wrap up\n", "ax.set_title('Florida Rainfall totals (cm) %s : %s UTC' % (start_time, end_time), fontsize=12)\n", "plt.savefig('florida_rainfall_totals.png')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " LATITUDE LONGITUDE TOTALS\n", "STID \n", "SURF1 30.020560 -84.986110 0.0\n", "KFPY 30.070810 -83.581540 0.0\n", "KAAF 29.726940 -85.024720 0.0\n", "KCEW 30.772220 -86.520000 0.0\n", "KCTY 29.633326 -83.105458 0.0\n", "... ... ... ...\n", "1950W 29.636110 -83.126900 0.0\n", "G2219 27.758670 -82.725500 0.0\n", "SSEUH 28.759110 -81.523410 0.0\n", "2003W 30.535960 -84.213330 0.0\n", "G2392 28.496330 -81.601170 0.0\n", "\n", "[392 rows x 3 columns]\n", "0 0369W 985.777 986 -81.77957 26.46131\n", "1 0834W 895.9338 896 -80.95339 29.12042\n", "2 1334W 82.95700000000001 83 -81.18766 29.57435\n", "3 1342W 76.174 76 -81.40158 28.62773\n", "4 SSSBU 304.416 304 -80.10982 26.09212\n", "5 1866W 90.04339999999999 90 -81.22758 29.26585\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8dd66f11c9874e568b0d89ec6b018b38", "version_major": 2, "version_minor": 0 }, "image/png": "", "text/html": [ "\n", "
\n", "
\n", " Figure\n", "
\n", " \n", "
\n", " " ], "text/plain": [ "Canvas(header_visible=False, toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Bac…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#what about possible outliers, where are they?\n", "# > 75., < 0., and == 0.\n", "fig3 = plt.figure(figsize=(5,10))\n", "# Hide the Figure name at the top of the figure\n", "fig3.canvas.header_visible = False\n", "# Always showthe toolbar\n", "fig3.canvas.toolbar_visible = True\n", "#define that the most basic projection: cylindricl equidistant: equal in lat and lon\n", "ax = plt.axes(projection=ccrs.PlateCarree())\n", "#what part of the globe?\n", "ax.set_extent([-85., -78., 23.5, 32.], ccrs.PlateCarree())\n", "# state outlines\n", "states = cartopy.feature.NaturalEarthFeature(\n", " category='cultural', scale='50m', facecolor='none',\n", " name='admin_1_states_provinces')\n", "ax.add_feature(states,zorder=2,edgecolor='darkslategrey',linewidth=0.8,alpha = 0.5)\n", "\n", "# get locations as numpy arrays for plotting\n", "x = df_ppt.LONGITUDE.to_numpy()\n", "y = df_ppt.LATITUDE.to_numpy()\n", "\n", "#plot all locations with a grey dot\n", "plt.scatter(x,y,zorder=2,s=5,c=\"grey\")\n", "\n", "#plot places eq 0 cm as a red dot\n", "p_eq0 = df_ppt[df_ppt['TOTALS'] == 0.]\n", "print(p_eq0)\n", "x_eq0 = p_eq0.LONGITUDE.to_numpy()\n", "y_eq0 = p_eq0.LATITUDE.to_numpy()\n", "plt.scatter(x_eq0,y_eq0,zorder=10,s=20,c=\"red\")\n", "\n", "#plot places < 0 cm as a yellow dot\n", "x_neg = df_ppt_neg.LONGITUDE.to_numpy()\n", "y_neg = df_ppt_neg.LATITUDE.to_numpy()\n", "plt.scatter(x_neg,y_neg,zorder=10,s=20,c=\"yellow\")\n", "\n", "#plot places > 75 cm as a blue dot\n", "x_big = df_ppt_big.LONGITUDE.to_numpy()\n", "y_big = df_ppt_big.LATITUDE.to_numpy()\n", "plt.scatter(x_big,y_big,zorder=10,s=20,c=\"blue\")\n", "\n", "#where are the big values and what are their values?\n", "#have to loop over all cases\n", "for i, tot in enumerate(df_ppt_big.TOTALS.to_numpy()):\n", " # get the station id\n", " stid = df_ppt_big.index[i]\n", " # save the total as a string\n", " str_tot = \"%.d\" % np.round(tot)\n", " print(i,stid,tot,str_tot,x_big[i],y_big[i])\n", " #label the big totals\n", " ax.annotate(str_tot, (x_big[i], y_big[i]),size='large')\n", "\n", "#wrap up\n", "ax.set_title('Spurious Florida Rainfall totals (cm) %s : %s UTC' % (start_time, end_time), fontsize=12)\n", "plt.savefig('florida_rainfall_totals_spurious.png')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# October 4 In-Class Exercise\n", "\n", "In the cell immediately above:\n", "- modify one line to label the locations of the big totals by the STID rather than the total\n", "- copy some lines to label the locations of the negative totals\n", "- modify the figure title so it includes your unid (no name)\n", "- copy the resulting figure to the class Teams page" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# more to follow!\n", "\n", "- are all the really large values realistic?\n", "- what happens when the power goes out?\n", "- how do we curate the data to get a better estimate of total rainfall?\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 4 }