{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# illustrate using Pandas and json IO" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "#access a function from the urllib module\n", "from urllib.request import urlretrieve\n", "#also let's use a linux command to see the file size\n", "import os\n" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "# needed to handle dates. See Chapter 15\n", "from datetime import datetime,timezone\n", "#adding a new module Pandas. See Chapter 16\n", "import pandas as pd\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will be accessing public data from a commercial site for which I need to disclose that I am involved with the company, Synoptic Data:\n", "- I am on the Board of Directors\n", "- I am a shareholder\n", "- I have a grant from that company to help with their customer support and research and development\n" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "# we are using an api (application programming interface) service\n", "# see https://developers.synopticdata.com/mesonet/\n", "# get all temperature observations at the Salt Lake City airport during 2022\n", "# station_id = KSLC\n", "# the variable is defined as \"air_temp\"\n", "# local times from Midnight New Year's Eve to Midnight last night\n", "# output = csv\n", "# you are using a \"token\" to obtain access. This token may expire at some future date" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Saved slc_temp.csv 2876.563 KB\n" ] } ], "source": [ "url = \"https://api.synopticdata.com/v2/stations/timeseries?&token=bace3f05279d4de1bb2f03011843709e&start=202201010700&end=202209220600&obtimezone=local&output=csv&stid=kslc&vars=air_temp\"\n", "# define the file to write the data into\n", "filename = \"slc_temp.csv\"\n", "#let's try to get the file from the web\n", "try:\n", " #get the file over the web\n", " urlretrieve(url, filename)\n", " print(\"Saved\", filename, os.path.getsize(filename)/1000., 'KB')\n", "except:\n", " print(\"something wrong grabbing the file\")\n", " print(\"but the program continues, so may be in error\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# STOP!\n", "Launch a terminal window and look at the file uaing more \n", "\n", "How many header lines are there?\n", "\n", "Are there any footer lines that need to be removed? How do you check for those?\n", "\n", "How many columns in each row of data?" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | TMPC | \n", "
---|---|
Date | \n", "\n", " |
2022-01-01 07:00:00+00:00 | \n", "-9.4 | \n", "
2022-01-01 07:05:00+00:00 | \n", "-9.4 | \n", "
2022-01-01 07:10:00+00:00 | \n", "-8.9 | \n", "
2022-01-01 07:15:00+00:00 | \n", "-7.2 | \n", "
2022-01-01 07:20:00+00:00 | \n", "-8.3 | \n", "
... | \n", "... | \n", "
2022-09-22 05:45:00+00:00 | \n", "20.0 | \n", "
2022-09-22 05:50:00+00:00 | \n", "20.0 | \n", "
2022-09-22 05:54:00+00:00 | \n", "20.0 | \n", "
2022-09-22 05:55:00+00:00 | \n", "20.0 | \n", "
2022-09-22 06:00:00+00:00 | \n", "20.6 | \n", "
82747 rows × 1 columns
\n", "\n", " | STATUS | \n", "MNET_ID | \n", "ELEVATION | \n", "NAME | \n", "STID | \n", "ELEV_DEM | \n", "LONGITUDE | \n", "STATE | \n", "RESTRICTED | \n", "QC_FLAGGED | \n", "LATITUDE | \n", "TIMEZONE | \n", "ID | \n", "PERIOD_OF_RECORD.start | \n", "PERIOD_OF_RECORD.end | \n", "SENSOR_VARIABLES.air_temp.air_temp_set_1.position | \n", "UNITS.position | \n", "UNITS.elevation | \n", "OBSERVATIONS.date_time | \n", "OBSERVATIONS.air_temp_set_1 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "ACTIVE | \n", "1 | \n", "4226 | \n", "Salt Lake City, Salt Lake City International A... | \n", "KSLC | \n", "4235.6 | \n", "-111.96503 | \n", "UT | \n", "False | \n", "True | \n", "40.77069 | \n", "America/Denver | \n", "53 | \n", "1997-01-01T00:00:00Z | \n", "2022-09-22T19:35:00Z | \n", "2.0 | \n", "m | \n", "ft | \n", "[2022-01-01T00:00:00-0700, 2022-01-01T00:05:00... | \n", "[-9.4, -9.4, -8.9, -7.2, -8.3, -9.4, -10.0, -1... | \n", "
1 | \n", "ACTIVE | \n", "153 | \n", "4996 | \n", "U of U Mountain Met Lab | \n", "MTMET | \n", "4993.4 | \n", "-111.828211 | \n", "UT | \n", "False | \n", "False | \n", "40.766573 | \n", "America/Denver | \n", "33898 | \n", "2012-04-26T00:00:00Z | \n", "2022-09-22T19:40:00Z | \n", "2.5 | \n", "m | \n", "ft | \n", "[2022-01-01T00:00:00-0700, 2022-01-01T00:01:00... | \n", "[-10.35, -10.411, -10.439, -10.539, -10.6, -10... | \n", "