{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Discrete Choice Models Overview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data\n",
    "\n",
    "Load data from Spector and Mazzeo (1980). Examples follow Greene's Econometric Analysis Ch. 21 (5th Edition)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "spector_data = sm.datasets.spector.load(as_pandas=False)\n",
    "spector_data.exog = sm.add_constant(spector_data.exog, prepend=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Inspect the data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 2.66 20.    0.    1.  ]\n",
      " [ 2.89 22.    0.    1.  ]\n",
      " [ 3.28 24.    0.    1.  ]\n",
      " [ 2.92 12.    0.    1.  ]\n",
      " [ 4.   21.    0.    1.  ]]\n",
      "[0. 0. 0. 0. 1.]\n"
     ]
    }
   ],
   "source": [
    "print(spector_data.exog[:5,:])\n",
    "print(spector_data.endog[:5])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Probability Model (OLS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameters:  [0.46385168 0.01049512 0.37855479]\n"
     ]
    }
   ],
   "source": [
    "lpm_mod = sm.OLS(spector_data.endog, spector_data.exog)\n",
    "lpm_res = lpm_mod.fit()\n",
    "print('Parameters: ', lpm_res.params[:-1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Logit Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameters:  [  2.82611259   0.09515766   2.37868766 -13.02134686]\n"
     ]
    }
   ],
   "source": [
    "logit_mod = sm.Logit(spector_data.endog, spector_data.exog)\n",
    "logit_res = logit_mod.fit(disp=0)\n",
    "print('Parameters: ', logit_res.params)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Marginal Effects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        Logit Marginal Effects       \n",
      "=====================================\n",
      "Dep. Variable:                      y\n",
      "Method:                          dydx\n",
      "At:                           overall\n",
      "==============================================================================\n",
      "                dy/dx    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1             0.3626      0.109      3.313      0.001       0.148       0.577\n",
      "x2             0.0122      0.018      0.686      0.493      -0.023       0.047\n",
      "x3             0.3052      0.092      3.304      0.001       0.124       0.486\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "margeff = logit_res.get_margeff()\n",
    "print(margeff.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As in all the discrete data models presented below, we can print a nice summary of results:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                           Logit Regression Results                           \n",
      "==============================================================================\n",
      "Dep. Variable:                      y   No. Observations:                   32\n",
      "Model:                          Logit   Df Residuals:                       28\n",
      "Method:                           MLE   Df Model:                            3\n",
      "Date:                Mon, 24 Feb 2020   Pseudo R-squ.:                  0.3740\n",
      "Time:                        22:49:06   Log-Likelihood:                -12.890\n",
      "converged:                       True   LL-Null:                       -20.592\n",
      "Covariance Type:            nonrobust   LLR p-value:                  0.001502\n",
      "==============================================================================\n",
      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1             2.8261      1.263      2.238      0.025       0.351       5.301\n",
      "x2             0.0952      0.142      0.672      0.501      -0.182       0.373\n",
      "x3             2.3787      1.065      2.234      0.025       0.292       4.465\n",
      "const        -13.0213      4.931     -2.641      0.008     -22.687      -3.356\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "print(logit_res.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Probit Model "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.400588\n",
      "         Iterations 6\n",
      "Parameters:  [ 1.62581004  0.05172895  1.42633234 -7.45231965]\n",
      "Marginal effects: \n",
      "       Probit Marginal Effects       \n",
      "=====================================\n",
      "Dep. Variable:                      y\n",
      "Method:                          dydx\n",
      "At:                           overall\n",
      "==============================================================================\n",
      "                dy/dx    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1             0.3608      0.113      3.182      0.001       0.139       0.583\n",
      "x2             0.0115      0.018      0.624      0.533      -0.025       0.048\n",
      "x3             0.3165      0.090      3.508      0.000       0.140       0.493\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "probit_mod = sm.Probit(spector_data.endog, spector_data.exog)\n",
    "probit_res = probit_mod.fit()\n",
    "probit_margeff = probit_res.get_margeff()\n",
    "print('Parameters: ', probit_res.params)\n",
    "print('Marginal effects: ')\n",
    "print(probit_margeff.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multinomial Logit"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load data from the American National Election Studies:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "anes_data = sm.datasets.anes96.load(as_pandas=False)\n",
    "anes_exog = anes_data.exog\n",
    "anes_exog = sm.add_constant(anes_exog, prepend=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Inspect the data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-2.30258509  7.         36.          3.          1.        ]\n",
      " [ 5.24755025  3.         20.          4.          1.        ]\n",
      " [ 3.43720782  2.         24.          6.          1.        ]\n",
      " [ 4.4200447   3.         28.          6.          1.        ]\n",
      " [ 6.46162441  5.         68.          6.          1.        ]]\n",
      "[6. 1. 1. 1. 0.]\n"
     ]
    }
   ],
   "source": [
    "print(anes_data.exog[:5,:])\n",
    "print(anes_data.endog[:5])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit MNL model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 1.548647\n",
      "         Iterations 7\n",
      "[[-1.15359746e-02 -8.87506530e-02 -1.05966699e-01 -9.15567017e-02\n",
      "  -9.32846040e-02 -1.40880692e-01]\n",
      " [ 2.97714352e-01  3.91668642e-01  5.73450508e-01  1.27877179e+00\n",
      "   1.34696165e+00  2.07008014e+00]\n",
      " [-2.49449954e-02 -2.28978371e-02 -1.48512069e-02 -8.68134503e-03\n",
      "  -1.79040689e-02 -9.43264870e-03]\n",
      " [ 8.24914421e-02  1.81042758e-01 -7.15241904e-03  1.99827955e-01\n",
      "   2.16938850e-01  3.21925702e-01]\n",
      " [ 5.19655317e-03  4.78739761e-02  5.75751595e-02  8.44983753e-02\n",
      "   8.09584122e-02  1.08894083e-01]\n",
      " [-3.73401677e-01 -2.25091318e+00 -3.66558353e+00 -7.61384309e+00\n",
      "  -7.06047825e+00 -1.21057509e+01]]\n"
     ]
    }
   ],
   "source": [
    "mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog)\n",
    "mlogit_res = mlogit_mod.fit()\n",
    "print(mlogit_res.params)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Poisson\n",
    "\n",
    "Load the Rand data. Note that this example is similar to Cameron and Trivedi's `Microeconometrics` Table 20.5, but it is slightly different because of minor changes in the data. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rand_data = sm.datasets.randhie.load(as_pandas=False)\n",
    "rand_exog = rand_data.exog.view(float).reshape(len(rand_data.exog), -1)\n",
    "rand_exog = sm.add_constant(rand_exog, prepend=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit Poisson model: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 3.091609\n",
      "         Iterations 6\n",
      "                          Poisson Regression Results                          \n",
      "==============================================================================\n",
      "Dep. Variable:                      y   No. Observations:                20190\n",
      "Model:                        Poisson   Df Residuals:                    20180\n",
      "Method:                           MLE   Df Model:                            9\n",
      "Date:                Mon, 24 Feb 2020   Pseudo R-squ.:                 0.06343\n",
      "Time:                        22:49:06   Log-Likelihood:                -62420.\n",
      "converged:                       True   LL-Null:                       -66647.\n",
      "Covariance Type:            nonrobust   LLR p-value:                     0.000\n",
      "==============================================================================\n",
      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.0525      0.003    -18.216      0.000      -0.058      -0.047\n",
      "x2            -0.2471      0.011    -23.272      0.000      -0.268      -0.226\n",
      "x3             0.0353      0.002     19.302      0.000       0.032       0.039\n",
      "x4            -0.0346      0.002    -21.439      0.000      -0.038      -0.031\n",
      "x5             0.2717      0.012     22.200      0.000       0.248       0.296\n",
      "x6             0.0339      0.001     60.098      0.000       0.033       0.035\n",
      "x7            -0.0126      0.009     -1.366      0.172      -0.031       0.005\n",
      "x8             0.0541      0.015      3.531      0.000       0.024       0.084\n",
      "x9             0.2061      0.026      7.843      0.000       0.155       0.258\n",
      "const          0.7004      0.011     62.741      0.000       0.678       0.722\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "poisson_mod = sm.Poisson(rand_data.endog, rand_exog)\n",
    "poisson_res = poisson_mod.fit(method=\"newton\")\n",
    "print(poisson_res.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Negative Binomial\n",
    "\n",
    "The negative binomial model gives slightly different results. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3/dist-packages/statsmodels/base/model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  warn(\"Maximum Likelihood optimization failed to converge. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                     NegativeBinomial Regression Results                      \n",
      "==============================================================================\n",
      "Dep. Variable:                      y   No. Observations:                20190\n",
      "Model:               NegativeBinomial   Df Residuals:                    20180\n",
      "Method:                           MLE   Df Model:                            9\n",
      "Date:                Mon, 24 Feb 2020   Pseudo R-squ.:                 0.01845\n",
      "Time:                        22:49:06   Log-Likelihood:                -43384.\n",
      "converged:                      False   LL-Null:                       -44199.\n",
      "Covariance Type:            nonrobust   LLR p-value:                     0.000\n",
      "==============================================================================\n",
      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.0579      0.006     -9.515      0.000      -0.070      -0.046\n",
      "x2            -0.2678      0.023    -11.802      0.000      -0.312      -0.223\n",
      "x3             0.0412      0.004      9.938      0.000       0.033       0.049\n",
      "x4            -0.0381      0.003    -11.216      0.000      -0.045      -0.031\n",
      "x5             0.2691      0.030      8.985      0.000       0.210       0.328\n",
      "x6             0.0382      0.001     26.080      0.000       0.035       0.041\n",
      "x7            -0.0441      0.020     -2.201      0.028      -0.083      -0.005\n",
      "x8             0.0173      0.036      0.478      0.632      -0.054       0.088\n",
      "x9             0.1782      0.074      2.399      0.016       0.033       0.324\n",
      "const          0.6635      0.025     26.786      0.000       0.615       0.712\n",
      "alpha          1.2930      0.019     69.477      0.000       1.256       1.329\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "mod_nbin = sm.NegativeBinomial(rand_data.endog, rand_exog)\n",
    "res_nbin = mod_nbin.fit(disp=False)\n",
    "print(res_nbin.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Alternative solvers\n",
    "\n",
    "The default method for fitting discrete data MLE models is Newton-Raphson. You can use other solvers by using the ``method`` argument: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Maximum number of iterations has been exceeded.\n",
      "         Current function value: 1.548650\n",
      "         Iterations: 100\n",
      "         Function evaluations: 106\n",
      "         Gradient evaluations: 106\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3/dist-packages/statsmodels/base/model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  warn(\"Maximum Likelihood optimization failed to converge. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          MNLogit Regression Results                          \n",
      "==============================================================================\n",
      "Dep. Variable:                      y   No. Observations:                  944\n",
      "Model:                        MNLogit   Df Residuals:                      908\n",
      "Method:                           MLE   Df Model:                           30\n",
      "Date:                Mon, 24 Feb 2020   Pseudo R-squ.:                  0.1648\n",
      "Time:                        22:49:06   Log-Likelihood:                -1461.9\n",
      "converged:                      False   LL-Null:                       -1750.3\n",
      "Covariance Type:            nonrobust   LLR p-value:                1.827e-102\n",
      "==============================================================================\n",
      "       y=1       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.0116      0.034     -0.338      0.735      -0.079       0.056\n",
      "x2             0.2973      0.094      3.175      0.001       0.114       0.481\n",
      "x3            -0.0250      0.007     -3.825      0.000      -0.038      -0.012\n",
      "x4             0.0821      0.074      1.116      0.264      -0.062       0.226\n",
      "x5             0.0052      0.018      0.294      0.769      -0.029       0.040\n",
      "const         -0.3689      0.630     -0.586      0.558      -1.603       0.866\n",
      "------------------------------------------------------------------------------\n",
      "       y=2       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.0888      0.039     -2.269      0.023      -0.166      -0.012\n",
      "x2             0.3913      0.108      3.615      0.000       0.179       0.603\n",
      "x3            -0.0229      0.008     -2.897      0.004      -0.038      -0.007\n",
      "x4             0.1808      0.085      2.120      0.034       0.014       0.348\n",
      "x5             0.0478      0.022      2.145      0.032       0.004       0.091\n",
      "const         -2.2451      0.763     -2.942      0.003      -3.741      -0.749\n",
      "------------------------------------------------------------------------------\n",
      "       y=3       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.1062      0.057     -1.861      0.063      -0.218       0.006\n",
      "x2             0.5730      0.159      3.614      0.000       0.262       0.884\n",
      "x3            -0.0149      0.011     -1.313      0.189      -0.037       0.007\n",
      "x4            -0.0075      0.126     -0.060      0.952      -0.255       0.240\n",
      "x5             0.0575      0.034      1.711      0.087      -0.008       0.123\n",
      "const         -3.6592      1.156     -3.164      0.002      -5.926      -1.393\n",
      "------------------------------------------------------------------------------\n",
      "       y=4       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.0914      0.044     -2.085      0.037      -0.177      -0.005\n",
      "x2             1.2826      0.129      9.937      0.000       1.030       1.536\n",
      "x3            -0.0085      0.008     -1.008      0.314      -0.025       0.008\n",
      "x4             0.2012      0.094      2.136      0.033       0.017       0.386\n",
      "x5             0.0850      0.026      3.240      0.001       0.034       0.136\n",
      "const         -7.6589      0.960     -7.982      0.000      -9.540      -5.778\n",
      "------------------------------------------------------------------------------\n",
      "       y=5       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.0934      0.039     -2.374      0.018      -0.170      -0.016\n",
      "x2             1.3451      0.117     11.485      0.000       1.116       1.575\n",
      "x3            -0.0180      0.008     -2.362      0.018      -0.033      -0.003\n",
      "x4             0.2161      0.085      2.542      0.011       0.049       0.383\n",
      "x5             0.0808      0.023      3.517      0.000       0.036       0.126\n",
      "const         -7.0401      0.844     -8.344      0.000      -8.694      -5.387\n",
      "------------------------------------------------------------------------------\n",
      "       y=6       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "x1            -0.1409      0.042     -3.345      0.001      -0.224      -0.058\n",
      "x2             2.0686      0.143     14.433      0.000       1.788       2.349\n",
      "x3            -0.0095      0.008     -1.164      0.244      -0.025       0.006\n",
      "x4             0.3216      0.091      3.532      0.000       0.143       0.500\n",
      "x5             0.1087      0.025      4.299      0.000       0.059       0.158\n",
      "const        -12.0913      1.059    -11.415      0.000     -14.167     -10.015\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "mlogit_res = mlogit_mod.fit(method='bfgs', maxiter=100)\n",
    "print(mlogit_res.summary())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2rc2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
