

.. _sphx_glr_gallery_misc_rec_groupby_demo.py:


================
Rec Groupby Demo
================






.. rst-class:: sphx-glr-script-out

 Out::

    loading /build/matplotlib-rU0lhX/matplotlib-2.1.1/lib/matplotlib/mpl-data/sample_data/msft.csv
    summary by years
       years   rcnt   rmean   rmedian   rsigma
        2003     65   0.002     0.001    0.016
    summary by months
       months   rcnt    rmean   rmedian   rsigma
            6      8   -0.002    -0.002    0.012
            7     22    0.002     0.003    0.018
            8     21    0.000     0.000    0.010
            9     14    0.009     0.013    0.019
    summary by year and month
       years   months   rcnt    rmean   rmedian   rsigma
        2003        6      8   -0.002    -0.002    0.012
        2003        7     22    0.002     0.003    0.018
        2003        8     21    0.000     0.000    0.010
        2003        9     14    0.009     0.013    0.019
    summary by volume
       volcode   rcnt   rmean   rmedian   rsigma
             5     65   0.002     0.001    0.016




|


.. code-block:: python

    from __future__ import print_function
    import numpy as np
    import matplotlib.mlab as mlab
    import matplotlib.cbook as cbook

    datafile = cbook.get_sample_data('msft.csv', asfileobj=False)
    print('loading', datafile)
    r = mlab.csv2rec(datafile)
    r.sort()


    def daily_return(prices):
        'an array of daily returns from price array'
        g = np.zeros_like(prices)
        g[1:] = (prices[1:] - prices[:-1])/prices[:-1]
        return g


    def volume_code(volume):
        'code the continuous volume data categorically'
        ind = np.searchsorted([1e5, 1e6, 5e6, 10e6, 1e7], volume)
        return ind

    # a list of (dtype_name, summary_function, output_dtype_name).
    # rec_summarize will call on each function on the indicated recarray
    # attribute, and the result assigned to output name in the return
    # record array.
    summaryfuncs = (
        ('date', lambda x: [thisdate.year for thisdate in x], 'years'),
        ('date', lambda x: [thisdate.month for thisdate in x], 'months'),
        ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'),
        ('adj_close', daily_return, 'dreturn'),
        ('volume', volume_code, 'volcode'),
        )

    rsum = mlab.rec_summarize(r, summaryfuncs)

    # stats is a list of (dtype_name, function, output_dtype_name).
    # rec_groupby will summarize the attribute identified by the
    # dtype_name over the groups in the groupby list, and assign the
    # result to the output_dtype_name
    stats = (
        ('dreturn', len, 'rcnt'),
        ('dreturn', np.mean, 'rmean'),
        ('dreturn', np.median, 'rmedian'),
        ('dreturn', np.std, 'rsigma'),
        )

    # you can summarize over a single variable, like years or months
    print('summary by years')
    ry = mlab.rec_groupby(rsum, ('years',), stats)
    print(mlab. rec2txt(ry))

    print('summary by months')
    rm = mlab.rec_groupby(rsum, ('months',), stats)
    print(mlab.rec2txt(rm))

    # or over multiple variables like years and months
    print('summary by year and month')
    rym = mlab.rec_groupby(rsum, ('years', 'months'), stats)
    print(mlab.rec2txt(rym))

    print('summary by volume')
    rv = mlab.rec_groupby(rsum, ('volcode',), stats)
    print(mlab.rec2txt(rv))

**Total running time of the script:** ( 0 minutes  0.034 seconds)



.. only :: html

 .. container:: sphx-glr-footer


  .. container:: sphx-glr-download

     :download:`Download Python source code: rec_groupby_demo.py <rec_groupby_demo.py>`



  .. container:: sphx-glr-download

     :download:`Download Jupyter notebook: rec_groupby_demo.ipynb <rec_groupby_demo.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_
