{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Overview of the mapclassify API\n",
    "\n",
    "There are a number of ways to access the functionality in `mapclassify`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We first load the example dataset that we have seen earlier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from libpysal import examples\n",
    "import geopandas as gpd\n",
    "from mapclassify import classify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AREA</th>\n",
       "      <th>PERIMETER</th>\n",
       "      <th>COLUMBUS_</th>\n",
       "      <th>COLUMBUS_I</th>\n",
       "      <th>POLYID</th>\n",
       "      <th>NEIG</th>\n",
       "      <th>HOVAL</th>\n",
       "      <th>INC</th>\n",
       "      <th>CRIME</th>\n",
       "      <th>OPEN</th>\n",
       "      <th>...</th>\n",
       "      <th>DISCBD</th>\n",
       "      <th>X</th>\n",
       "      <th>Y</th>\n",
       "      <th>NSA</th>\n",
       "      <th>NSB</th>\n",
       "      <th>EW</th>\n",
       "      <th>CP</th>\n",
       "      <th>THOUS</th>\n",
       "      <th>NEIGNO</th>\n",
       "      <th>geometry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.309441</td>\n",
       "      <td>2.440629</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>80.467003</td>\n",
       "      <td>19.531</td>\n",
       "      <td>15.725980</td>\n",
       "      <td>2.850747</td>\n",
       "      <td>...</td>\n",
       "      <td>5.03</td>\n",
       "      <td>38.799999</td>\n",
       "      <td>44.070000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1005.0</td>\n",
       "      <td>POLYGON ((8.62413 14.23698, 8.55970 14.74245, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.259329</td>\n",
       "      <td>2.236939</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>44.567001</td>\n",
       "      <td>21.232</td>\n",
       "      <td>18.801754</td>\n",
       "      <td>5.296720</td>\n",
       "      <td>...</td>\n",
       "      <td>4.27</td>\n",
       "      <td>35.619999</td>\n",
       "      <td>42.380001</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1001.0</td>\n",
       "      <td>POLYGON ((8.25279 14.23694, 8.28276 14.22994, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.192468</td>\n",
       "      <td>2.187547</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>26.350000</td>\n",
       "      <td>15.956</td>\n",
       "      <td>30.626781</td>\n",
       "      <td>4.534649</td>\n",
       "      <td>...</td>\n",
       "      <td>3.89</td>\n",
       "      <td>39.820000</td>\n",
       "      <td>41.180000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1006.0</td>\n",
       "      <td>POLYGON ((8.65331 14.00809, 8.81814 14.00205, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.083841</td>\n",
       "      <td>1.427635</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>33.200001</td>\n",
       "      <td>4.477</td>\n",
       "      <td>32.387760</td>\n",
       "      <td>0.394427</td>\n",
       "      <td>...</td>\n",
       "      <td>3.70</td>\n",
       "      <td>36.500000</td>\n",
       "      <td>40.520000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1002.0</td>\n",
       "      <td>POLYGON ((8.45950 13.82035, 8.47341 13.83227, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.488888</td>\n",
       "      <td>2.997133</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>23.225000</td>\n",
       "      <td>11.252</td>\n",
       "      <td>50.731510</td>\n",
       "      <td>0.405664</td>\n",
       "      <td>...</td>\n",
       "      <td>2.83</td>\n",
       "      <td>40.009998</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1007.0</td>\n",
       "      <td>POLYGON ((8.68527 13.63952, 8.67758 13.72221, ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       AREA  PERIMETER  COLUMBUS_  COLUMBUS_I  POLYID  NEIG      HOVAL  \\\n",
       "0  0.309441   2.440629          2           5       1     5  80.467003   \n",
       "1  0.259329   2.236939          3           1       2     1  44.567001   \n",
       "2  0.192468   2.187547          4           6       3     6  26.350000   \n",
       "3  0.083841   1.427635          5           2       4     2  33.200001   \n",
       "4  0.488888   2.997133          6           7       5     7  23.225000   \n",
       "\n",
       "      INC      CRIME      OPEN  ...  DISCBD          X          Y  NSA  NSB  \\\n",
       "0  19.531  15.725980  2.850747  ...    5.03  38.799999  44.070000  1.0  1.0   \n",
       "1  21.232  18.801754  5.296720  ...    4.27  35.619999  42.380001  1.0  1.0   \n",
       "2  15.956  30.626781  4.534649  ...    3.89  39.820000  41.180000  1.0  1.0   \n",
       "3   4.477  32.387760  0.394427  ...    3.70  36.500000  40.520000  1.0  1.0   \n",
       "4  11.252  50.731510  0.405664  ...    2.83  40.009998  38.000000  1.0  1.0   \n",
       "\n",
       "    EW   CP   THOUS  NEIGNO                                           geometry  \n",
       "0  1.0  0.0  1000.0  1005.0  POLYGON ((8.62413 14.23698, 8.55970 14.74245, ...  \n",
       "1  0.0  0.0  1000.0  1001.0  POLYGON ((8.25279 14.23694, 8.28276 14.22994, ...  \n",
       "2  1.0  0.0  1000.0  1006.0  POLYGON ((8.65331 14.00809, 8.81814 14.00205, ...  \n",
       "3  0.0  0.0  1000.0  1002.0  POLYGON ((8.45950 13.82035, 8.47341 13.83227, ...  \n",
       "4  1.0  0.0  1000.0  1007.0  POLYGON ((8.68527 13.63952, 8.67758 13.72221, ...  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pth = examples.get_path('columbus.shp')\n",
    "gdf = gpd.read_file(pth)\n",
    "y = gdf.HOVAL\n",
    "gdf.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Original API (< 2.4.0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BoxPlot               \n",
       "\n",
       "   Interval      Count\n",
       "----------------------\n",
       "( -inf, -0.70] |     0\n",
       "(-0.70, 25.70] |    13\n",
       "(25.70, 33.50] |    12\n",
       "(33.50, 43.30] |    12\n",
       "(43.30, 69.70] |     7\n",
       "(69.70, 96.40] |     5"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import mapclassify\n",
    "\n",
    "bp = mapclassify.BoxPlot(y)\n",
    "bp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extended API (>= 2.40)\n",
    "\n",
    "Note the original API is still available so this extension keeps backwards compatibility."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BoxPlot               \n",
       "\n",
       "   Interval      Count\n",
       "----------------------\n",
       "( -inf, -0.70] |     0\n",
       "(-0.70, 25.70] |    13\n",
       "(25.70, 33.50] |    12\n",
       "(33.50, 43.30] |    12\n",
       "(43.30, 69.70] |     7\n",
       "(69.70, 96.40] |     5"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bp = classify(y, 'box_plot')\n",
    "bp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "mapclassify.classifiers.BoxPlot"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(bp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Quantiles             \n",
       "\n",
       "   Interval      Count\n",
       "----------------------\n",
       "[17.90, 23.08] |    10\n",
       "(23.08, 30.48] |    10\n",
       "(30.48, 39.10] |     9\n",
       "(39.10, 45.83] |    10\n",
       "(45.83, 96.40] |    10"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q5 = classify(y, 'quantiles', k=5)\n",
    "q5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Robustness of the `scheme` argument"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BoxPlot               \n",
       "\n",
       "   Interval      Count\n",
       "----------------------\n",
       "( -inf, -0.70] |     0\n",
       "(-0.70, 25.70] |    13\n",
       "(25.70, 33.50] |    12\n",
       "(33.50, 43.30] |    12\n",
       "(43.30, 69.70] |     7\n",
       "(69.70, 96.40] |     5"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classify(y, 'boxPlot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BoxPlot               \n",
       "\n",
       "   Interval      Count\n",
       "----------------------\n",
       "( -inf, -0.70] |     0\n",
       "(-0.70, 25.70] |    13\n",
       "(25.70, 33.50] |    12\n",
       "(33.50, 43.30] |    12\n",
       "(43.30, 69.70] |     7\n",
       "(69.70, 96.40] |     5"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classify(y, 'Boxplot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BoxPlot               \n",
       "\n",
       "   Interval      Count\n",
       "----------------------\n",
       "( -inf, -0.70] |     0\n",
       "(-0.70, 25.70] |    13\n",
       "(25.70, 33.50] |    12\n",
       "(33.50, 43.30] |    12\n",
       "(43.30, 69.70] |     7\n",
       "(69.70, 96.40] |     5"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classify(y, 'Box_plot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[0;31mSignature:\u001b[0m\n",
       "\u001b[0mclassify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mscheme\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mk\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mpct\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m90\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m99\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mpct_sampled\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mtruncate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mhinge\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mmultiples\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mmindiff\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0minitial\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m    \u001b[0mbins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
       "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mDocstring:\u001b[0m\n",
       "Classify your data with `mapclassify.classify`\n",
       "Note: Input parameters are dependent on classifier used.\n",
       "\n",
       "Parameters\n",
       "----------\n",
       "y : array\n",
       "    (n,1), values to classify\n",
       "scheme : str\n",
       "    pysal.mapclassify classification scheme\n",
       "k : int, optional\n",
       "    The number of classes. Default=5.\n",
       "pct  : array, optional\n",
       "    Percentiles used for classification with `percentiles`.\n",
       "    Default=[1,10,50,90,99,100]\n",
       "pct_sampled : float, optional\n",
       "    The percentage of n that should form the sample\n",
       "    (JenksCaspallSampled, FisherJenksSampled)\n",
       "    If pct is specified such that n*pct > 1000, then pct = 1000./n\n",
       "truncate : boolean, optional\n",
       "    truncate pct_sampled in cases where pct * n > 1000., (Default True)\n",
       "hinge : float, optional\n",
       "    Multiplier for IQR when `BoxPlot` classifier used.\n",
       "    Default=1.5.\n",
       "multiples : array, optional\n",
       "    The multiples of the standard deviation to add/subtract from\n",
       "    the sample mean to define the bins using `std_mean`.\n",
       "    Default=[-2,-1,1,2].\n",
       "mindiff : float, optional\n",
       "    The minimum difference between class breaks\n",
       "    if using `maximum_breaks` classifier. Deafult =0.\n",
       "initial : int\n",
       "    Number of initial solutions to generate or number of runs\n",
       "    when using `natural_breaks` or `max_p_classifier`.\n",
       "    Default =100.\n",
       "    Note: setting initial to 0 will result in the quickest\n",
       "    calculation of bins.\n",
       "bins : array, optional\n",
       "    (k,1), upper bounds of classes (have to be monotically  \n",
       "    increasing) if using `user_defined` classifier.\n",
       "    Default =None, Example =[20, max(y)].\n",
       "\n",
       "Returns\n",
       "-------\n",
       "classifier : pysal.mapclassify.classifier instance\n",
       "        Object containing bin ids for each observation (.yb),\n",
       "        upper bounds of each class (.bins), number of classes (.k)\n",
       "        and number of observations falling in each class (.counts)\n",
       "\n",
       "Note: Supported classifiers include: quantiles, box_plot, euqal_interval,\n",
       "    fisher_jenks, headtail_breaks, jenks_caspall, jenks_caspall_forced,\n",
       "    max_p_classifier, maximum_breaks, natural_breaks, percentiles, std_mean,\n",
       "    user_defined\n",
       "\n",
       "\n",
       "Examples\n",
       "--------\n",
       "Imports\n",
       "\n",
       ">>> from libpysal import examples\n",
       ">>> import geopandas as gpd\n",
       ">>> from mapclassify import classify\n",
       "\n",
       "Load Example Data\n",
       "\n",
       ">>> link_to_data = examples.get_path('columbus.shp')\n",
       ">>> gdf = gpd.read_file(link_to_data)\n",
       ">>> x = gdf['HOVAL'].values\n",
       "\n",
       "Classify values by quantiles\n",
       "\n",
       ">>> quantiles = classify(x, 'quantiles')\n",
       "\n",
       "Classify values by box_plot and set hinge to 2\n",
       "\n",
       ">>> box_plot = classify(x, 'box_plot', hinge=2)\n",
       "\u001b[0;31mFile:\u001b[0m      ~/Dropbox/p/pysal/src/subpackages/mapclassify/mapclassify/_classify_API.py\n",
       "\u001b[0;31mType:\u001b[0m      function\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "classify?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
