# comment : import the pandas library
import pandas as pd
import matplotlib.pyplot as plt
# comment # do shift + enter
print('air_quality_all_data.xlsx file is located at directory /Users/invbat/projects/air_quality_all_data.xlsx')
print('Remember the directory path of your filename for example air_quality_all_data.xlsx. Otherwise you will get error message')
air_quality = pd.read_excel('/Users/invbat/projects/air_quality_all_data.xlsx')
air_quality.head()
# comment # do shift + enter
# comment : I want to know the number of city with air quality station for my data analysis
air_quality.city.unique()
# comment # do shift + enter
# comment : I want to work with the dates in the column datetime as datetime objects instead of plain
print('By applying the to_datetime function, pandas interprets the strings and convert these to datetime (i.e. datetime64[ns, UTC]) objects. ')
air_quality["datetime"] = pd.to_datetime(air_quality["date.utc"])
air_quality["datetime"].head()
# comment # do shift + enter
# comment : What is the start and end date of the time series data set working with?
print('The start date is the first row')
print('The end date is the second row')
print('')
air_quality["datetime"].min(), air_quality["datetime"].max()
# comment # Do shift + enter
# comment : Now I want to know how many days of air quality monitoring our data has
air_quality["datetime"].max() - air_quality["datetime"].min()
# comment # Do shift + enter
# comment : I want to add a month column in the air_quality table
air_quality["month"] = air_quality["datetime"].dt.month
air_quality.head()
# comment # Do shift + enter
# comment : What is the average NO2 concentration for each day of the week for each of the measurement locations?
print('Assuming datetime 0 represent Sunday and datetime 6 represent Saturday')
air_quality.groupby( [air_quality["datetime"].dt.weekday, "location"])["value"].mean()
# comment # Do shit + enter
# comment : Plot the typical NO2 pattern during the day of our time series of all stations together.
# In other words, what is the average value for each hour of the day?
fig1, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', rot=0, ax=axs)
#axs.set_ylabel("Average NO$_2$ concentration");
#axs.set_xlabel("Pollutant Every Hour For Each Day - figure 1 - Bar Plot");
plt.ylabel("$NO_2 (µg/m^3)$");
plt.xlabel("Hour of the day");
fig2, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='box', rot=0, ax=axs)
axs.set_ylabel("Average NO$_2$ concentration");
axs.set_xlabel("Distribution of Pollutant For Each Day - figure 1 - Box Plot");
fig3, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='line', rot=0, ax=axs)
axs.set_ylabel("Average NO$_2$ concentration");
axs.set_xlabel("Pollutant Every Hour For Each Day - figure 3 - Line Plot");
fig4, axs = plt.subplots(figsize=(12, 6))
air_quality.groupby(air_quality["datetime"].dt.hour)["value"].mean().plot(kind='barh', rot=0, ax=axs)
axs.set_ylabel("Pollutant Every Hour For Each Day");
axs.set_xlabel("Average NO$_2$ concentration - figure 4 - Horizontal Bar Plot");
# comment # Do shift + enter
# comment : Plot the typical NO2 average for each air quality station
fig1, axs = plt.subplots(figsize=(12, 4))
air_quality.groupby(air_quality["location"])["value"].mean().plot(kind='bar', rot=0, ax=axs)
#axs.set_ylabel("$NO_2 (µg/m^3)$");
#axs.set_xlabel("Group By Station - figure 1 - bar Plot");
plt.ylabel("$NO_2 (µg/m^3)$");
plt.xlabel("Group By Station");
# comment : How many rows of data I am doing an analysis?
air_quality.shape
# comment # Do shift + enter
# comment : What are the list of column fieldnames my data has?
air_quality.info()
# comment # Do shift + enter
# comment : Get me the average or mean of pollutant no2, and pm25 for each location of air quality station
air_quality.pivot_table(values="value", index="location", columns="parameter", aggfunc="mean", margins=False)
# comment # Do shift + enter
# comment : Get me the overall average or mean of pollutant no2, and pm25 for each location of air quality station
air_quality.pivot_table(values="value", index="location", columns="parameter", aggfunc="mean", margins=True)
# comment # Do shift + enter
PREVIOUS LESSON 7 NEXT LESSON 9
Why do you need a personal augmented intelligence (AI) chatbot? Because it is useful in knowledge storage, information retrieval, and fast computation with less error.
IN-V-BAT-AI uses explainable Artificial Intelligence (AI) to automate repetitive solved problem or routine calculation so we can focus our brain power to solve harder new problem then automate again once it is solved.
INVBAT.COM - A.I. is a disruptive innovation in computing and web search technology.
For example scientific calculator help us speed up calculation but we still need to remember accurately the formula and the correct sequence of data entry.
Here comes the disruptive innovation from INVBAT.COM-A.I. , today the problem of remembering formula and the correct sequence of data entry is now solved
by combining formula and calculation and make it on demand using smartphone, tablet, notebook, Chromebook, laptop, desktop, school smartboard and
company big screen tv in conference room with internet connection.