import pandas as pd
import numpy as np
import scipy as sp
import warnings
import mpl_toolkits.mplot3d.axes3d as p3
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.api as sm

import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from scipy import fftpack
from scipy import integrate

# comment # Do shift + enter

# comment : Hierarchal multi-indexing is a powerful data analyis and manipulation tool
# comment : Show me how it works?

arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))
tuples
# comment # Do shift + enter

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

# comment : create a multiIndex column fieldnames
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
index
# comment # Do shift + enter

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

# comment : first column will be indexed and the second column will be indexed also
# comment : create 8 random values assign to first two columns
s = pd.Series(np.random.randn(8), index=index)
s
# comment # Do shift + enter

first  second
bar    one      -0.782467
       two       1.328989
baz    one       0.239303
       two       0.907330
foo    one       0.080944
       two      -0.416424
qux    one      -1.760565
       two      -0.304891
dtype: float64

# comment : create an array list
arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
          np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])]
arrays
s = pd.Series(np.random.randn(8), index=arrays)
s
# comment # Do shift + enter

bar  one    0.173284
     two   -0.125278
baz  one   -2.074158
     two    0.095763
foo  one   -2.625309
     two   -2.044817
qux  one    0.749229
     two    0.339710
dtype: float64

# comment : create a DataFrame table with 8 rows and 4 columns using random number generator
# comment : then use arrays table in index format. Then display the newly created table.
# comment : The code below satisfy the above requires.
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df
# comment # Do shift + enter

# comment :
df.index.names
# comment # Do shift + enter

FrozenList([None, None])

# comment : Create a new DataFrame name = df with rows and 8 columns
bar = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)
bar
# comment # Do shift + enter

# comment : Plot the bar chart of dataframe name = bar1
bar.plot.bar(figsize=(12,4))
# comment # Do shift + enter

<matplotlib.axes._subplots.AxesSubplot at 0x200822b4948>

# comment :
bar2 = pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])
bar2
# comment # Do shift + enter

# comment : Plot the bar chart of dataframe name = bar2
bar2.plot.bar(figsize=(12,4))

<matplotlib.axes._subplots.AxesSubplot at 0x20082aac348>

# comment : linear regression model using ordinary least square method (OLS)
# comment : Use random number generator seed(9876789) resulted to R-squared = 0.999
# comment : Use random number generator seed(25439587) resulted to R-squared = 0.954
# comment : Use random number generator seed(0) resulted to R-squared = 0.928

np.random.seed(0)
nsample = 100
x = np.linspace(0, 10, 100)
X = np.column_stack((x, x**2))
beta = np.array([1, 0.1, 10])
e = np.random.normal(size=nsample)
X = sm.add_constant(X)
y = np.dot(X, beta) + e


nsample = 50
sig = 0.5
x = np.linspace(0, 20, nsample)
X = np.column_stack((x, np.sin(x), (x-5)**2, np.ones(nsample)))
beta = [0.5, 0.5, -0.02, 5.]
y_true = np.dot(X, beta)
y = y_true + sig * np.random.normal(size=nsample)


model = sm.OLS(y, X)
results = model.fit()
res = sm.OLS(y, X).fit()
print(results.summary())
print('')
print('Parameters: ', results.params)
print('R-squared result (R^2): ', results.rsquared)


# comment # Do shift + enter

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.928
Model:                            OLS   Adj. R-squared:                  0.923
Method:                 Least Squares   F-statistic:                     196.3
Date:                Sat, 13 Jun 2020   Prob (F-statistic):           3.21e-26
Time:                        18:10:27   Log-Likelihood:                -35.697
No. Observations:                  50   AIC:                             79.39
Df Residuals:                      46   BIC:                             87.04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.4879      0.027     18.016      0.000       0.433       0.542
x2             0.3124      0.106      2.934      0.005       0.098       0.527
x3            -0.0203      0.002     -8.524      0.000      -0.025      -0.015
const          5.2728      0.176     30.030      0.000       4.919       5.626
==============================================================================
Omnibus:                        4.715   Durbin-Watson:                   2.294
Prob(Omnibus):                  0.095   Jarque-Bera (JB):                1.971
Skew:                          -0.029   Prob(JB):                        0.373
Kurtosis:                       2.029   Cond. No.                         221.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Parameters:  [ 0.48785572  0.31236524 -0.02026744  5.27275408]
R-squared result (R^2):  0.9275450485572443

# comment : show me the plot

print('Parameters: ', res.params)
print('Standard errors: ', res.bse)
print('Predicted values: ', res.predict())


prstd, iv_l, iv_u = wls_prediction_std(res)
fig, ax = plt.subplots(figsize=(12,6))

ax.plot(x, y, 'o', label="data")
ax.plot(x, y_true, 'b-', label="True")
ax.plot(x, res.fittedvalues, 'r--.', label="OLS")
ax.plot(x, iv_u, 'r--')
ax.plot(x, iv_l, 'r--')
ax.legend(loc='best');

# comment # Do shift + enter

Parameters:  [ 0.48785572  0.31236524 -0.02026744  5.27275408]
Standard errors:  [0.02707946 0.10645252 0.0023776  0.1755842 ]
Predicted values:  [ 4.76606809  5.16852593  5.54386033  5.87504767  6.15120805  6.36939285
  6.53506919  6.66122167  6.76621904  6.8707963   6.99464795  7.15319227
  7.35503816  7.60057065  7.88188773  8.18409855  8.48777     8.77211949
  9.01842947  9.21312268  9.34999285  9.43122437  9.4670334   9.4739894
  9.47229279  9.4824561   9.52193382  9.60225463  9.72712734  9.89183151
 10.08399298 10.28561696 10.47604449 10.63534665 10.74759884 10.80349713
 10.80188688 10.74995245 10.66203682 10.55728506 10.4564989  10.37871901
 10.33809672 10.34156895 10.38771852 10.467007   10.56334183 10.65672026
 10.7265158  10.75486704]

# comment : I need you to learn how to extract sample data sets statmodel module
# comment show me how to use the least Absolute Deviation (LAD) quantile regression
data = sm.datasets.engel.load_pandas().data
data.head()
# comment # Do shift + enter

# comment : "res" is the abbreviation for regression analysis
mod = smf.quantreg('foodexp ~ income', data)
res = mod.fit(q=.5)
print(res.summary())
# comment # Do shift + enter

                         QuantReg Regression Results                          
==============================================================================
Dep. Variable:                foodexp   Pseudo R-squared:               0.6206
Model:                       QuantReg   Bandwidth:                       64.51
Method:                 Least Squares   Sparsity:                        209.3
Date:                Sat, 13 Jun 2020   No. Observations:                  235
Time:                        18:10:27   Df Residuals:                      233
                                        Df Model:                            1
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     81.4823     14.634      5.568      0.000      52.649     110.315
income         0.5602      0.013     42.516      0.000       0.534       0.586
==============================================================================

The condition number is large, 2.38e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

# comment :
quantiles = np.arange(.05, .96, .1)
def fit_model(q):
    res = mod.fit(q=q)
    return [q, res.params['Intercept'], res.params['income']] + \
            res.conf_int().loc['income'].tolist()

models = [fit_model(x) for x in quantiles]
models = pd.DataFrame(models, columns=['q', 'a', 'b', 'lb', 'ub'])

ols = smf.ols('foodexp ~ income', data).fit()
ols_ci = ols.conf_int().loc['income'].tolist()
ols = dict(a = ols.params['Intercept'],
           b = ols.params['income'],
           lb = ols_ci[0],
           ub = ols_ci[1])

print(models)
print(ols)

# comment # Do shift + enter

      q           a         b        lb        ub
0  0.05  124.880097  0.343361  0.268632  0.418090
1  0.15  111.693660  0.423708  0.382780  0.464636
2  0.25   95.483539  0.474103  0.439900  0.508306
3  0.35  105.841294  0.488901  0.457759  0.520043
4  0.45   81.083647  0.552428  0.525021  0.579835
5  0.55   89.661370  0.565601  0.540955  0.590247
6  0.65   74.033435  0.604576  0.582169  0.626982
7  0.75   62.396584  0.644014  0.622411  0.665617
8  0.85   52.272216  0.677603  0.657383  0.697823
9  0.95   64.103964  0.709069  0.687831  0.730306
{'a': 147.4753885237058, 'b': 0.4851784236769232, 'lb': 0.45687381301842295, 'ub': 0.5134830343354234}

# comment :

x = np.arange(data.income.min(), data.income.max(), 50)
get_y = lambda a, b: a + b * x

fig3, ax = plt.subplots(figsize=(12, 6))

for i in range(models.shape[0]):
    y = get_y(models.a[i], models.b[i])
    ax.plot(x, y, linestyle='dotted', color='grey')

y = get_y(ols['a'], ols['b'])

ax.plot(x, y, color='red', label='OLS')
ax.scatter(data.income, data.foodexp, alpha=.2)
ax.set_xlim((240, 3000))
ax.set_ylim((240, 2000))
legend = ax.legend()
ax.set_xlabel('Income', fontsize=16)
ax.set_ylabel('Food expenditure', fontsize=16);
# comment # Do shift + enter

# comment :
fig4, ax = plt.subplots(figsize=(12, 6))

n = models.shape[0]
p1 = plt.plot(models.q, models.b, color='black', label='Quantile Reg.')
p2 = plt.plot(models.q, models.ub, linestyle='dotted', color='black')
p3 = plt.plot(models.q, models.lb, linestyle='dotted', color='black')
p4 = plt.plot(models.q, [ols['b']] * n, color='red', label='OLS')
p5 = plt.plot(models.q, [ols['lb']] * n, linestyle='dotted', color='red')
p6 = plt.plot(models.q, [ols['ub']] * n, linestyle='dotted', color='red')
plt.ylabel(r'$\beta_{income}$')
plt.xlabel('Quantiles of the conditional food expenditure distribution')
plt.legend()
plt.show()

# comment # Do shift + enter

# comment : The RecursiveLS class allows computation of recursive residuals and computes CUSUM and
# CUSUM of squares statistics. Plotting these statistics along with reference lines denoting statistically
# significant deviations from the null hypothesis of stable parameters allows an easy visual indication of
# parameter stability.

print(sm.datasets.copper.DESCRLONG)

dta = sm.datasets.copper.load_pandas().data
dta.index = pd.date_range('1951-01-01', '1975-01-01', freq='AS')
endog = dta['WORLDCONSUMPTION']
# To the regressors in the dataset, we add a column of ones for an intercept
exog = sm.add_constant(dta[['COPPERPRICE', 'INCOMEINDEX', 'ALUMPRICE', 'INVENTORYINDEX']])

# comment # Do shift + enter

This data describes the world copper market from 1951 through 1975.  In an
example, in Gill, the outcome variable (of a 2 stage estimation) is the world
consumption of copper for the 25 years.  The explanatory variables are the
world consumption of copper in 1000 metric tons, the constant dollar adjusted
price of copper, the price of a substitute, aluminum, an index of real per
capita income base 1970, an annual measure of manufacturer inventory change,
and a time trend.

# comment : summary table only presents the regression parameters estimated on the entire sample; 
# except for small effects from initialization of the recursions, these estimates are equivalent to OLS estimates.

mod = sm.RecursiveLS(endog, exog)
res = mod.fit()

print(res.summary())
# comment # Do shift + enter

                           Statespace Model Results                           
==============================================================================
Dep. Variable:       WORLDCONSUMPTION   No. Observations:                   25
Model:                    RecursiveLS   Log Likelihood                -154.720
Date:                Sat, 13 Jun 2020   R-squared:                       0.965
Time:                        18:10:28   AIC                            319.441
Sample:                    01-01-1951   BIC                            325.535
                         - 01-01-1975   HQIC                           321.131
Covariance Type:            nonrobust   Scale                       117717.127
==================================================================================
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const          -6562.3719   2378.939     -2.759      0.006   -1.12e+04   -1899.737
COPPERPRICE      -13.8132     15.041     -0.918      0.358     -43.292      15.666
INCOMEINDEX      1.21e+04    763.401     15.853      0.000    1.06e+04    1.36e+04
ALUMPRICE         70.4146     32.678      2.155      0.031       6.367     134.462
INVENTORYINDEX   311.7330   2130.084      0.146      0.884   -3863.155    4486.621
===================================================================================
Ljung-Box (Q):                       15.65   Jarque-Bera (JB):                 1.70
Prob(Q):                              0.68   Prob(JB):                         0.43
Heteroskedasticity (H):               3.38   Skew:                            -0.67
Prob(H) (two-sided):                  0.13   Kurtosis:                         2.53
===================================================================================

Warnings:
[1] Parameters and covariance matrix estimates are RLS estimates conditional on the entire sample.

# comment : Plot the recursive coefficient

print(res.recursive_coefficients.filtered[0])
res.plot_recursive_coefficient(range(mod.k_exog), alpha=None, figsize=(10,6));
# comment # Do shift + enter

[ 2.88890087e+00  4.94795049e+00  1.55841803e+03  1.95843327e+03
 -5.14749565e+04 -4.16895010e+03 -2.25261351e+03 -4.46559101e+02
 -5.28839794e+03 -6.94231935e+03 -7.84608902e+03 -6.64315120e+03
 -6.27411015e+03 -7.27201695e+03 -6.31902648e+03 -5.82223929e+03
 -6.25630902e+03 -6.73740446e+03 -6.47742841e+03 -5.99590746e+03
 -6.45080677e+03 -6.02292166e+03 -5.25835152e+03 -5.32089136e+03
 -6.56237193e+03]

# comment :  In the plot below, the CUSUM statistic does not move outside of the 5% significance bands, 
# so we fail to reject the null hypothesis of stable parameters at the 5% level.

print(res.cusum)
fig = res.plot_cusum(figsize=(12,4));
# comment # Do shift + enter

[ 0.69971508  0.65841243  1.24629673  2.05476031  2.39888918  3.17861979
  2.67244671  2.01783214  2.46131746  2.05268637  0.95054335 -1.04505547
 -2.55465287 -2.29908152 -1.45289493 -1.95353994 -1.35046621  0.15789828
  0.63286529 -1.48184587]

# comment : In the plot below, the CUSUM of squares statistic does not move outside of the 5% significance bands, 
# so we fail to reject the null hypothesis of stable parameters at the 5% level.

res.plot_cusum_squares(figsize=(12,4));
# comment # Do shift + enter

		0	1	2	3
bar	one	0.138294	-2.534564	0.710399	-0.063686
bar	two	0.423538	-0.574675	-0.573271	0.336992
baz	one	-0.098690	-1.129737	-1.057710	-0.949792
baz	two	0.774282	0.533855	-0.692040	0.113485
foo	one	-1.550148	-1.163986	0.601037	-0.185074
foo	two	-0.925162	-0.145719	1.655199	-0.203688
qux	one	-1.998161	-2.777484	1.459837	-0.349347
qux	two	2.131043	-0.599111	0.147536	-1.172360

first	bar		baz		foo		qux
second	one	two	one	two	one	two	one	two
A	-1.661040	0.495785	-1.131690	-2.766052	1.256251	0.154200	0.128014	1.676229
B	0.941079	0.930319	0.531501	-0.611327	1.705435	0.550764	0.961846	2.004243
C	0.612964	0.076291	0.391283	1.302301	-1.588977	0.590265	0.314479	0.881738

	first	bar		baz		foo
	second	one	two	one	two	one	two
first	second
bar	one	0.387596	-0.207385	0.446616	-1.700743	-0.695830	2.010629
bar	two	-0.570906	1.276288	1.493564	-0.238385	-0.489002	0.318271
baz	one	0.196606	2.598687	0.385334	-1.504406	1.677697	0.592196
baz	two	-0.790297	-0.698975	0.122206	0.566740	-0.158884	-1.905858
foo	one	0.697351	0.708705	-0.284903	-0.540190	-0.666178	-0.300883
foo	two	0.653719	1.081239	-0.762367	0.417088	0.094575	0.138866

	income	foodexp
0	420.157651	255.839425
1	541.411707	310.958667
2	901.157457	485.680014
3	639.080229	402.997356
4	750.875606	495.560775

Year	Top 10 countries	Pages visited
2023	1. USA 2. Great Britain 3. Germany 4. Canada 5. Iran 6. Netherlands 7. India 8. China 9. Australia 10. Philippines	127,256 Pages / 27,541 Visitors
2024	1. USA 2. China 3. Canada 4. Poland 5. India 6. Philippines 7. Great Britain 8. Australia 9. Indonesia 10. Russia	164,130 Pages / 40,724 Visitors
Daily Site Visitor Ranking 7/9/2025	1. USA 2. India 3. Iran 4. Latvia 5. Canada 6. Russia 7. Lithuania 8. Poland 9. Belize 10. China	Year to Date 100,343 Pages / 29,569 Visitors

How to use Python Pandas and Seaborn Libraries

import pandas as pd , numpy as np, scipy as sp, seaborn as sns
and matplotlib.pyplot as plt, statsmodel.api as sm

How can IN-V-BAT-AI be used in classrooms ?

Copyright 2025
Never Forget with IN-V-BAT-AI

INVenting Brain Assistant Tools using Artificial Intelligence
(IN-V-BAT-AI)

How to use Python Pandas and Seaborn Libraries

import pandas as pd , numpy as np, scipy as sp, seaborn as sns and matplotlib.pyplot as plt, statsmodel.api as sm

How can IN-V-BAT-AI be used in classrooms ?

Copyright 2025 Never Forget with IN-V-BAT-AI INVenting Brain Assistant Tools using Artificial Intelligence (IN-V-BAT-AI)

import pandas as pd , numpy as np, scipy as sp, seaborn as sns
and matplotlib.pyplot as plt, statsmodel.api as sm

Copyright 2025
Never Forget with IN-V-BAT-AI

INVenting Brain Assistant Tools using Artificial Intelligence
(IN-V-BAT-AI)