# comment : import the pandas library
import pandas as pd
import matplotlib.pyplot as plt
# comment # do shift + enter

print('air_quality_all_data.xlsx file is located at directory  /Users/invbat/projects/air_quality_all_data.xlsx') 
print('Remember the directory path of your filename for example air_quality_all_data.xlsx. Otherwise you will get error message')
air_quality = pd.read_excel('/Users/invbat/projects/air_quality_all_data.xlsx')
air_quality.head()
# comment # do shift + enter

air_quality_all_data.xlsx file is located at directory  /Users/invbat/projects/air_quality_all_data.xlsx
Remember the directory path of your filename for example air_quality_all_data.xlsx. Otherwise you will get error message

# comment : I want to know the number of city with air quality station for my data analysis
air_quality.city.unique()
# comment # do shift + enter

array(['Antwerpen', 'London', 'Paris'], dtype=object)

# comment : I want to work with the dates in the column datetime as datetime objects instead of plain
print('By applying the to_datetime function, pandas interprets the strings and convert these to datetime (i.e. datetime64[ns, UTC]) objects. ')
air_quality["datetime"] = pd.to_datetime(air_quality["date.utc"])
air_quality["datetime"].head()
# comment # do shift + enter

By applying the to_datetime function, pandas interprets the strings and convert these to datetime (i.e. datetime64[ns, UTC]) objects.

0   2019-06-18 06:00:00+00:00
1   2019-06-17 08:00:00+00:00
2   2019-06-17 07:00:00+00:00
3   2019-06-17 06:00:00+00:00
4   2019-06-17 05:00:00+00:00
Name: datetime, dtype: datetime64[ns, UTC]

# comment : What is the start and end date of the time series data set working with?
print('The start date is the first row')
print('The end date is the second row')
print('')
air_quality["datetime"].min(), air_quality["datetime"].max()
# comment # Do shift + enter

The start date is the first row
The end date is the second row

(Timestamp('2019-04-09 01:00:00+0000', tz='UTC'),
 Timestamp('2019-06-21 00:00:00+0000', tz='UTC'))

# comment : Now I want to know how many days of air quality monitoring our data has
air_quality["datetime"].max() - air_quality["datetime"].min()
# comment # Do shift + enter

Timedelta('72 days 23:00:00')

# comment : I want to add a month column in the air_quality table
air_quality["month"] = air_quality["datetime"].dt.month
air_quality.head()
# comment # Do shift + enter

# comment : What is the average NO2 concentration for each day of the week for each of the measurement locations?
print('Assuming datetime 0 represent Sunday and datetime 6 represent Saturday')
air_quality.groupby( [air_quality["datetime"].dt.weekday, "location"])["value"].mean()
# comment # Do shit + enter

Assuming datetime 0 represent Sunday and datetime 6 represent Saturday

datetime  location          
0         BETR801               25.065657
          FR04014               29.495417
          London Westminster    21.173077
1         BETR801               32.423077
          FR04014               34.402381
          London Westminster    26.102510
2         BETR801               18.812500
          FR04014               30.130579
          London Westminster    22.427039
3         BETR801               18.892857
          FR04014               28.749378
          London Westminster    21.354906
4         BETR801               18.180000
          FR04014               32.980851
          London Westminster    20.756930
5         BETR801               24.500000
          FR04014               24.955752
          London Westminster    19.367580
6         BETR801               27.297101
          FR04014               24.467917
          London Westminster    18.980349
Name: value, dtype: float64

# comment : Plot the typical NO2 pattern during the day of our time series of all stations together.
# In other words, what is the average value for each hour of the day?
fig1, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', rot=0, ax=axs)
#axs.set_ylabel("Average NO$_2$ concentration");
#axs.set_xlabel("Pollutant Every Hour For Each Day -  figure 1 - Bar Plot");
plt.ylabel("$NO_2 (µg/m^3)$");
plt.xlabel("Hour of the day");

fig2, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='box', rot=0, ax=axs)
axs.set_ylabel("Average NO$_2$ concentration");
axs.set_xlabel("Distribution of Pollutant For Each Day -  figure 1 - Box Plot");

fig3, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='line', rot=0, ax=axs)
axs.set_ylabel("Average NO$_2$ concentration");
axs.set_xlabel("Pollutant Every Hour For Each Day -  figure 3 - Line Plot");

fig4, axs = plt.subplots(figsize=(12, 6))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='barh', rot=0, ax=axs)
axs.set_ylabel("Pollutant Every Hour For Each Day");
axs.set_xlabel("Average NO$_2$ concentration -  figure 4 - Horizontal Bar Plot");


# comment # Do shift + enter

# comment : Plot the typical NO2 average for each air quality station
fig1, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["location"])["value"].mean().plot(kind='bar', rot=0, ax=axs)
#axs.set_ylabel("$NO_2 (µg/m^3)$");
#axs.set_xlabel("Group By Station -  figure 1 - bar Plot");
plt.ylabel("$NO_2 (µg/m^3)$");
plt.xlabel("Group By Station");

# comment : How many rows of data I am doing an analysis?
air_quality.shape
# comment # Do shift + enter

(5272, 9)

# comment : What are the list of column fieldnames my data has?
air_quality.info()
# comment # Do shift + enter

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5272 entries, 0 to 5271
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype              
---  ------     --------------  -----              
 0   city       5272 non-null   object             
 1   country    5272 non-null   object             
 2   date.utc   5272 non-null   object             
 3   location   5272 non-null   object             
 4   parameter  5272 non-null   object             
 5   value      5272 non-null   float64            
 6   unit       5272 non-null   object             
 7   datetime   5272 non-null   datetime64[ns, UTC]
 8   month      5272 non-null   int64              
dtypes: datetime64[ns, UTC](1), float64(1), int64(1), object(6)
memory usage: 370.8+ KB

# comment : Get me the average or mean of pollutant no2, and pm25 for each location of air quality station
air_quality.pivot_table(values="value", index="location", columns="parameter", aggfunc="mean", margins=False)
# comment # Do shift + enter

# comment : Get me the overall average or mean of pollutant no2, and pm25 for each location of air quality station
air_quality.pivot_table(values="value", index="location", columns="parameter", aggfunc="mean", margins=True)
# comment # Do shift + enter

parameter	no2	pm25	All
location
BETR801	26.950920	23.169492	24.982353
FR04014	29.374284	NaN	29.374284
London Westminster	29.740050	13.443568	21.491708
All	29.430316	14.386849	24.222743

Learn Once and Never Forget Now Possible !

How to use Python matplotlib.pyplot

INVBAT.COM -A.I.
The Personal Memory Assistant Company

BECAUSE MOST OF US FORGET

import pandas as pd , import matplotlib.pyplot as plt
Part 5

Open More Calculator Database

Learn Once and Never Forget
Now Possible !

Copyright 2023 IN-V-BAT-AI
The Personal Memory Assistant Company

INVenting Brain Assistant Tools using Artificial Intelligence
(IN-V-BAT-AI)

	city	country	date.utc	location	parameter	value	unit
0	Antwerpen	BE	2019-06-18 06:00:00+00:00	BETR801	pm25	18.0	µg/m³
1	Antwerpen	BE	2019-06-17 08:00:00+00:00	BETR801	pm25	6.5	µg/m³
2	Antwerpen	BE	2019-06-17 07:00:00+00:00	BETR801	pm25	18.5	µg/m³
3	Antwerpen	BE	2019-06-17 06:00:00+00:00	BETR801	pm25	16.0	µg/m³
4	Antwerpen	BE	2019-06-17 05:00:00+00:00	BETR801	pm25	7.5	µg/m³

Learn Once and Never Forget Now Possible !

How to use Python matplotlib.pyplot

INVBAT.COM -A.I. The Personal Memory Assistant Company

BECAUSE MOST OF US FORGET

import pandas as pd , import matplotlib.pyplot as pltPart 5

Open More Calculator Database

Learn Once and Never Forget Now Possible !

Copyright 2023 IN-V-BAT-AI The Personal Memory Assistant Company INVenting Brain Assistant Tools using Artificial Intelligence (IN-V-BAT-AI)

INVBAT.COM -A.I.
The Personal Memory Assistant Company

import pandas as pd , import matplotlib.pyplot as plt
Part 5

Learn Once and Never Forget
Now Possible !

Copyright 2023 IN-V-BAT-AI
The Personal Memory Assistant Company

INVenting Brain Assistant Tools using Artificial Intelligence
(IN-V-BAT-AI)