import pandas as pd
import numpy as np
import scipy as sp
import warnings
import mpl_toolkits.mplot3d.axes3d as p3
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import fftpack
from scipy import integrate
# comment # Do shift + enter
# comment : How to solve missing data such as null, blank, NaN
# comment : Create a random table of numbers with 10 rows (index 0:9) and 3 columns (index 0:2)
# comment : the list column fieldname are 'A' , 'B', and 'C'
# comment : the code below satisfy the above data set requirement
dff = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC'))
# comment : Now modify the table, I want you to replace row index number 3 with 'NaN' up to row index number 4
# comment : Remember index numbering in Python the counting starts from 0, so add 1 to last row index 4+1 = 5
# comment : Use the first column A with index col = 0
# comment : the code below satisfy the above data modification
dff.iloc[3:5, 0] = np.nan
# comment : Modify again the table, I want you to replace row index number 4 with 'NaN' up to row index number 5
# comment : Remember index numbering in Python the counting starts from 0, so add 1 to last row index 5+1 = 6
# comment : Use the second column B with index col = 1
# comment : the code below satisfy the above data modification
dff.iloc[4:6, 1] = np.nan
# comment : Modify again the table, I want you to replace row index number 5 with 'NaN' up to row index number 7
# comment : Remember index numbering in Python the counting starts from 0, so add 1 to last row index 7+1 = 8
# comment : Use the third column C with index col = 2
# comment : the code below satisfy the above data modification
dff.iloc[5:8, 2] = np.nan
# comment : Now after modifying the table show me the new table created randomly.
# comment : the code below satify the above requirement
dff
# comment # Do shift + enter
# comment : I want you to plot the data set of A, B, C
# comment : the code below satisfy the above requirement
dff.plot()
# comment # Do shift + enter
# comment : Now , I want you to learn the industry practice use to replace the missing data (NaN or Null)
# comment : using the mean value for each respective column
# comment : the code below satisfy the above requirement
dff2 = dff.fillna(dff.mean())
dff2
# comment # Do shift + enter
# comment : Now after filling the missing data with the mean of its column, show me the graph of A, B, C data set
# comment : the code below satisfy the above requirement
dff2.plot()
# comment # Do shift + enter
# comment : Now this time, I want you only to replace the missing data in column B and C with the mean value of its column
# comment : the code below satisfy the above requirement
dff.fillna(dff.mean()['B':'C'])
# comment # Do shift + enter
# comment : Show me another code of filling the missing value using the mean of its column
# comment : the code below satisfy the above requirement
dff3 = dff.where(pd.notna(dff), dff.mean(), axis='columns')
dff3
# comment # Do shift + enter
# comment : Show me the plot of A, B, C data set with missing data fixed using mean of its column
dff3.plot()
# comment # Do shift + enter
# comment : show me again the A, B, C data set with missing data
# comment : the code below satisfy the above requirement
dff
# comment # Do shift + enter
# comment : Is there a new solution recommended by Pandas community to solve the missing value (NaN)?
# comment : yes, using the .interpolate() function. Show me how it works
# comment : the code below show how to use .interpolate() function to solve the missing data problem
dff4 = dff.interpolate()
dff4
# comment # Do shift + enter
# comment : Show me the new plot of A, B, C data set with corrected missing value using .interpolation() function
# comment : the code below satisfy the above requirement
dff4.plot()
# comment # Do shift + enter
# comment : scipy Python module is offering another solution to interpolating missing data using .method() function
# comment : Can you show , how it works? Use again the original A,B, C data sets with missing data
dff
# comment # Do shift + enter
# comment : use this method - dff.interpolate(method='barycentric') to fill the missing data by interpolation
# comment : the code below satisfy the above requirement
dff5 = dff.interpolate(method='barycentric')
dff5
# comment # Do shift + enter
# comment: Now plot the A, B, C data set with missing data fixed using .interpolate(method='barycentric') from scipy module
# comment: the code below satisfy the above requirement
dff5.plot()
# comment # Do shift + enter
# comment : Explain why the plot is not the same compare to previous plots? Need to learn more about barycentric method
# comment : The appropriate interpolation method will depend on the type of data you are working with.
# comment : It appears the barycentric method is not the appropriate interpolation method
# comment : use this method - dff.interpolate(method='akima') to fill the missing data by interpolation
# comment : use akima method To fill missing values with goal of smooth plotting.
# comment : the code below satisfy the above requirement
dff6 = dff.interpolate(method='akima')
dff6
# comment # Do shift + enter
# comment: Now plot the A, B, C data set with missing data fixed using .interpolate(method='akima') from scipy module
# comment: the code below satisfy the above requirement
dff6.plot()
# comment # Do shift + enter
# comment : The appropriate interpolation method will depend on the type of data you are working with.
# comment : It appears the akima method is an appropriate interpolation method similar to previous one
# comment : use this method - dff.interpolate(method='pchip') to fill the missing data by interpolation
# comment : use akima method to fill missing value If you have values approximating a cumulative distribution function,
# comment : the code below satisfy the above requirement
dff7 = dff.interpolate(method='pchip')
dff7
# comment # Do shift + enter
# comment: Now plot the A, B, C data set with missing data fixed using .interpolate(method='pchip') from scipy module
# comment: the code below satisfy the above requirement
dff7.plot()
# comment # Do shift + enter
# comment : The appropriate interpolation method will depend on the type of data you are working with.
# comment : It appears the pchip method is an appropriate interpolation method similar to previous one
# comment : show me comparsion of different method of filling missing data
# comment : Create a single column random generated number with a total count of 37
# comment : the code below satisfy the above requirement
np.random.seed(2)
ser = pd.Series(np.arange(1, 10.1, .25) ** 2 + np.random.randn(37))
# comment : Now , modify the ser table by replacing index row 4, 13, 14, 15, 16, 17, 18, 20, 29 with missing value (NaN)
# comment: the code below satisfy the above requirement
missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
ser[missing] = np.nan
ser
# comment # Do shift + enter
# comment : Now I want to see the plot of dataframe name = ser to see the missing data
# comment : the code below satisfy the above requirement
ser.plot(figsize=(12,4))
# comment # Do shift + enter
# comment : Show me the different interpolation solution to missing data sets
# comment : method must be one of ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear',
# comment : 'quadratic', 'cubic', 'barycentric', 'polynomial', 'krogh', 'piecewise_polynomial', 'pchip',
# comment : 'akima', 'spline', 'from_derivatives']
# comment : for spline and polynomial you must specify the order= ? .interpolate(method='spline', order=2)
# comment : the code below satisfy the above requirement using scipy module
np.random.seed(2)
ser = pd.Series(np.arange(1, 10.1, .25) ** 2 + np.random.randn(37))
missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
ser[missing] = np.nan
methods = ['linear', 'quadratic', 'cubic','akima','pchip','barycentric']
dff8 = pd.DataFrame({m: ser.interpolate(method=m) for m in methods})
dff8.plot(figsize=(12,4))
# comment # Do shift + enter
# comment : looking at the plot below barycentric interpolation method for replacing missing data is not appropriate.
# comment : linear, quadratic, cubic, akima, and pchip seems to be appropriate method to fill the missing data.
# comment : I want to see the pair plot analysis between each interpolation method to solve the problem of missing data
# comment : the code below satisfy the above requirement
sns.pairplot(dff8);
# comment # Do shift + enter
# comment: show me the scatter plot from dataframe name=dff using values from column A (index 0) and column B (index 1)
# comment : the code below satisfy the above requirement
dff.plot.scatter(0,1)
# comment # Do shift + enter
# comment: show me the scatter plot from dataframe name=dff7 using values from column A (index 0) and column B (index 1)
# comment : the code below satisfy the above requirement
dff7.plot.scatter(0,1)
# comment # Do shift + enter
PREVIOUS LESSON 11 NEXT LESSON 13
Why do you need a personal augmented intelligence (AI) chatbot? Because it is useful in knowledge storage, information retrieval, and fast computation with less error.
IN-V-BAT-AI uses explainable Artificial Intelligence (AI) to automate repetitive solved problem or routine calculation so we can focus our brain power to solve harder new problem then automate again once it is solved.

INVBAT.COM - A.I. is a disruptive innovation in computing and web search technology.
For example scientific calculator help us speed up calculation but we still need to remember accurately the formula and the correct sequence of data entry.
Here comes the disruptive innovation from INVBAT.COM-A.I. , today the problem of remembering formula and the correct sequence of data entry is now solved
by combining formula and calculation and make it on demand using smartphone, tablet, notebook, Chromebook, laptop, desktop, school smartboard and
company big screen tv in conference room with internet connection.