import pandas as pd
import numpy as np
import scipy as sp
import warnings
import mpl_toolkits.mplot3d.axes3d as p3
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import fftpack
from scipy import integrate

# comment # Do shift + enter

# comment : How to solve missing data such as null, blank, NaN
# comment : Create a random table of numbers with 10 rows (index 0:9) and 3 columns (index 0:2)
# comment : the list column fieldname are 'A' , 'B', and 'C'
# comment : the code below satisfy the above data set requirement
dff = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC'))
# comment : Now modify the table, I want you to replace row index number 3 with 'NaN' up to row index number 4
# comment : Remember index numbering in Python the counting starts from 0, so add 1 to last row index 4+1 = 5
# comment : Use the first column A with index col = 0
# comment : the code below satisfy the above data modification
dff.iloc[3:5, 0] = np.nan
# comment : Modify again the table, I want you to replace row index number 4 with 'NaN' up to row index number 5
# comment : Remember index numbering in Python the counting starts from 0, so add 1 to last row index 5+1 = 6
# comment : Use the second column B with index col = 1
# comment : the code below satisfy the above data modification
dff.iloc[4:6, 1] = np.nan
# comment : Modify again the table, I want you to replace row index number 5 with 'NaN' up to row index number 7
# comment : Remember index numbering in Python the counting starts from 0, so add 1 to last row index 7+1 = 8
# comment : Use the third column C with index col = 2
# comment : the code below satisfy the above data modification
dff.iloc[5:8, 2] = np.nan
# comment : Now after modifying the table show me the new table created randomly.
# comment : the code below satify the above requirement
dff

# comment # Do shift + enter

# comment : I want you to plot the data set of A, B, C
# comment : the code below satisfy the above requirement
dff.plot()

# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ebc0bb48>

# comment : Now , I want you to learn the industry practice use to replace the missing data (NaN or Null)
# comment : using the mean value for each respective column
# comment : the code below satisfy the above requirement
dff2 = dff.fillna(dff.mean())
dff2
# comment # Do shift + enter

# comment : Now after filling the missing data with the mean of its column, show me the graph of A, B, C data set
# comment : the code below satisfy the above requirement
dff2.plot()

# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec468ec8>

# comment : Now this time, I want you only to replace the missing data in column B and C with the mean value of its column
# comment : the code below satisfy the above requirement
dff.fillna(dff.mean()['B':'C'])

# comment # Do shift + enter

# comment : Show me another code of filling the missing value using the mean of its column
# comment : the code below satisfy the above requirement
dff3 = dff.where(pd.notna(dff), dff.mean(), axis='columns')
dff3 
# comment # Do shift + enter

# comment : Show me the plot of A, B, C data set with missing data fixed using mean of its column
dff3.plot()

# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec5304c8>

# comment : show me again the A, B, C data set with missing data
# comment : the code below satisfy the above requirement
dff

# comment # Do shift + enter

# comment : Is there a new solution recommended by Pandas community to solve the missing value (NaN)?
# comment : yes, using the .interpolate() function. Show me how it works
# comment : the code below show how to use .interpolate() function to solve the missing data problem
dff4 = dff.interpolate()
dff4
# comment # Do shift + enter

# comment : Show me the new plot of A, B, C data set with corrected missing value using .interpolation() function
# comment : the code below satisfy the above requirement
dff4.plot()

# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec5c2f08>

# comment : scipy Python module is offering another solution to interpolating missing data using .method() function
# comment : Can you show , how it works? Use again the original A,B, C data sets with missing data
dff

# comment # Do shift + enter

# comment : use this method - dff.interpolate(method='barycentric') to fill the missing data by interpolation
# comment : the code below satisfy the above requirement
dff5 = dff.interpolate(method='barycentric')
dff5
# comment # Do shift + enter

# comment: Now plot the A, B, C data set with missing data fixed using .interpolate(method='barycentric') from scipy module
# comment: the code below satisfy the above requirement
dff5.plot()
# comment # Do shift + enter

# comment : Explain why the plot is not the same compare to previous plots? Need to learn more about barycentric method
# comment : The appropriate interpolation method will depend on the type of data you are working with.
# comment : It appears the barycentric method is not the appropriate interpolation method

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec623048>

# comment : use this method - dff.interpolate(method='akima') to fill the missing data by interpolation
# comment : use akima method To fill missing values with goal of smooth plotting.
# comment : the code below satisfy the above requirement
dff6 = dff.interpolate(method='akima')
dff6
# comment # Do shift + enter

# comment: Now plot the A, B, C data set with missing data fixed using .interpolate(method='akima') from scipy module
# comment: the code below satisfy the above requirement
dff6.plot()
# comment # Do shift + enter

# comment : The appropriate interpolation method will depend on the type of data you are working with.
# comment : It appears the akima method is an appropriate interpolation method similar to previous one

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec6be308>

# comment : use this method - dff.interpolate(method='pchip') to fill the missing data by interpolation
# comment : use akima method to fill missing value If you have values approximating a cumulative distribution function,
# comment : the code below satisfy the above requirement
dff7 = dff.interpolate(method='pchip')
dff7
# comment # Do shift + enter

# comment: Now plot the A, B, C data set with missing data fixed using .interpolate(method='pchip') from scipy module
# comment: the code below satisfy the above requirement
dff7.plot()
# comment # Do shift + enter

# comment : The appropriate interpolation method will depend on the type of data you are working with.
# comment : It appears the pchip method is an appropriate interpolation method similar to previous one

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec7228c8>

# comment : show me comparsion of different method of filling missing data
# comment :  Create a single column random generated number with a total count of 37
# comment : the code below satisfy the above requirement
np.random.seed(2)
ser = pd.Series(np.arange(1, 10.1, .25) ** 2 + np.random.randn(37))
# comment : Now , modify the ser table by replacing index row 4, 13, 14, 15, 16, 17, 18, 20, 29 with missing value (NaN)
# comment: the code below satisfy the above requirement
missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
ser[missing] = np.nan
ser
# comment # Do shift + enter

0       0.583242
1       1.506233
2       0.113804
3       4.702771
4            NaN
5       4.220753
6       6.752881
7       6.317212
8       7.942048
9       9.653492
10     12.801454
11     16.354708
12     16.041539
13           NaN
14           NaN
15           NaN
16           NaN
17           NaN
18           NaN
19     33.071525
20           NaN
21     38.906066
22     42.506570
23     44.573721
24     48.661178
25     52.326316
26     55.612345
27     58.874888
28     62.578783
29           NaN
30     71.980943
31     78.793867
32     78.565232
33     85.675227
34     90.620445
35     96.422134
36    100.501857
dtype: float64

# comment : Now I want to see the plot of dataframe name = ser to see the missing data
# comment : the code below satisfy the above requirement
ser.plot(figsize=(12,4))
# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec7a1808>

# comment : Show me the different interpolation solution to missing data sets
# comment : method must be one of ['linear', 'time', 'index', 'values', 'nearest', 'zero', 'slinear',
# comment : 'quadratic', 'cubic', 'barycentric', 'polynomial', 'krogh', 'piecewise_polynomial', 'pchip', 
# comment : 'akima', 'spline', 'from_derivatives']
# comment : for spline and polynomial you must specify the order= ? .interpolate(method='spline', order=2)
# comment : the code below satisfy the above requirement using scipy module
np.random.seed(2)
ser = pd.Series(np.arange(1, 10.1, .25) ** 2 + np.random.randn(37))
missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
ser[missing] = np.nan
methods = ['linear', 'quadratic', 'cubic','akima','pchip','barycentric']
dff8 = pd.DataFrame({m: ser.interpolate(method=m) for m in methods})
dff8.plot(figsize=(12,4))
# comment # Do shift + enter
# comment : looking at the plot below barycentric interpolation method for replacing missing data is not appropriate.
# comment : linear, quadratic, cubic, akima, and pchip seems to be appropriate method to fill the missing data.

<matplotlib.axes._subplots.AxesSubplot at 0x1a2ec7ba948>

# comment : I want to see the pair plot analysis between each interpolation method to solve the problem of missing data
# comment : the code below satisfy the above requirement
sns.pairplot(dff8);
# comment # Do shift + enter

# comment: show me the scatter plot from dataframe name=dff using values from column A (index 0) and column B (index 1)
# comment : the code below satisfy the above requirement
dff.plot.scatter(0,1)
# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2eeb97548>

# comment: show me the scatter plot from dataframe name=dff7 using values from column A (index 0) and column B (index 1)
# comment : the code below satisfy the above requirement
dff7.plot.scatter(0,1)
# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x1a2eecd5088>

	A	B	C
0	-0.107053	-0.141714	-0.015151
1	-0.878768	-0.039139	-1.179437
2	-0.693589	-0.332923	-1.334796
3	NaN	-0.632339	1.177966
4	NaN	NaN	-0.238105
5	0.998745	NaN	NaN
6	0.756130	-0.907819	NaN
7	-0.886335	0.275779	NaN
8	0.759389	1.060048	-0.280023
9	0.257978	-0.131283	-1.051919

	A	B	C
0	-0.107053	-0.141714	-0.015151
1	-0.878768	-0.039139	-1.179437
2	-0.693589	-0.332923	-1.334796
3	0.025812	-0.632339	1.177966
4	0.025812	-0.106174	-0.238105
5	0.998745	-0.106174	-0.417352
6	0.756130	-0.907819	-0.417352
7	-0.886335	0.275779	-0.417352
8	0.759389	1.060048	-0.280023
9	0.257978	-0.131283	-1.051919

	A	B	C
0	-0.107053	-0.141714	-0.015151
1	-0.878768	-0.039139	-1.179437
2	-0.693589	-0.332923	-1.334796
3	NaN	-0.632339	1.177966
4	NaN	-0.106174	-0.238105
5	0.998745	-0.106174	-0.417352
6	0.756130	-0.907819	-0.417352
7	-0.886335	0.275779	-0.417352
8	0.759389	1.060048	-0.280023
9	0.257978	-0.131283	-1.051919

	A	B	C
0	-0.107053	-0.141714	-0.015151
1	-0.878768	-0.039139	-1.179437
2	-0.693589	-0.332923	-1.334796
3	0.025812	-0.632339	1.177966
4	0.025812	-0.106174	-0.238105
5	0.998745	-0.106174	-0.417352
6	0.756130	-0.907819	-0.417352
7	-0.886335	0.275779	-0.417352
8	0.759389	1.060048	-0.280023
9	0.257978	-0.131283	-1.051919

	A	B	C
0	-0.107053	-0.141714	-0.015151
1	-0.878768	-0.039139	-1.179437
2	-0.693589	-0.332923	-1.334796
3	NaN	-0.632339	1.177966
4	NaN	NaN	-0.238105
5	0.998745	NaN	NaN
6	0.756130	-0.907819	NaN
7	-0.886335	0.275779	NaN
8	0.759389	1.060048	-0.280023
9	0.257978	-0.131283	-1.051919

Learn Once and Never Forget Now Possible !

How to use Python Python pandas and seaborn libraries

INVBAT.COM -A.I.
The Personal Memory Assistant Company

BECAUSE MOST OF US FORGET

import pandas as pd , numpy as np, scipy as sp, seaborn as sns
and matplotlib.pyplot as plt

Open More Calculator Database

Learn Once and Never Forget
Now Possible !

Copyright 2023 IN-V-BAT-AI
The Personal Memory Assistant Company

INVenting Brain Assistant Tools using Artificial Intelligence
(IN-V-BAT-AI)

Learn Once and Never Forget Now Possible !

How to use Python Python pandas and seaborn libraries

INVBAT.COM -A.I. The Personal Memory Assistant Company

BECAUSE MOST OF US FORGET

import pandas as pd , numpy as np, scipy as sp, seaborn as sns and matplotlib.pyplot as plt

Open More Calculator Database

Learn Once and Never Forget Now Possible !

Copyright 2023 IN-V-BAT-AI The Personal Memory Assistant Company INVenting Brain Assistant Tools using Artificial Intelligence (IN-V-BAT-AI)

INVBAT.COM -A.I.
The Personal Memory Assistant Company

import pandas as pd , numpy as np, scipy as sp, seaborn as sns
and matplotlib.pyplot as plt

Learn Once and Never Forget
Now Possible !

Copyright 2023 IN-V-BAT-AI
The Personal Memory Assistant Company

INVenting Brain Assistant Tools using Artificial Intelligence
(IN-V-BAT-AI)