{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "universal-property",
   "metadata": {},
   "source": [
    "## Data preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "published-player",
   "metadata": {},
   "outputs": [],
   "source": [
    "import movekit as mkit\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "welsh-restriction",
   "metadata": {},
   "source": [
    "#### Read data input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "worst-scott",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enter path to CSV file\n",
    "path = \"./datasets/fish-5.csv\"\n",
    "\n",
    "# Alternative: enter path to Excel file\n",
    "# path = \"./datasets/fish-5.xlsx\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "empirical-marijuana",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>312</td>\n",
       "      <td>405.29</td>\n",
       "      <td>417.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>511</td>\n",
       "      <td>369.99</td>\n",
       "      <td>428.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>607</td>\n",
       "      <td>390.33</td>\n",
       "      <td>405.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>811</td>\n",
       "      <td>445.15</td>\n",
       "      <td>411.94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>905</td>\n",
       "      <td>366.06</td>\n",
       "      <td>451.76</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   time  animal_id       x       y\n",
       "0     1        312  405.29  417.76\n",
       "1     1        511  369.99  428.78\n",
       "2     1        607  390.33  405.89\n",
       "3     1        811  445.15  411.94\n",
       "4     1        905  366.06  451.76"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Read in file using \n",
    "data = mkit.read_data(path)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ba50f62-ef12-40cb-9588-a8d63d04572e",
   "metadata": {},
   "source": [
    "#### General preprocessing method "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "preceding-tamil",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of missing values =  0\n",
      "time         0\n",
      "animal_id    0\n",
      "x            0\n",
      "y            0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# Simple call of the preprocessing method\n",
    "preprocessed_data = mkit.preprocess(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "handled-implement",
   "metadata": {},
   "outputs": [],
   "source": [
    "# OPTIONAL: more parameters to control the preprocessing of data\n",
    "\n",
    "# preprocessed_data = mkit.preprocess(data, dropna=True, interpolation=False, limit=1, limit_direction=\"forward\", inplace=False, method=\"linear\", order=1, date_format=False)\n",
    "\n",
    "# Paramters \n",
    "#  data: DataFrame to perform preprocessing on\n",
    "#  dropna: Optional parameter to drop columns with  missing values for 'time' and 'animal_id'\n",
    "#  interpolate: Optional parameter to perform linear interpolation\n",
    "#  limit: Maximum number of consecutive NANs to fill\n",
    "#  limit_direction: If limit is specified, consecutive NaNs will be filled in this direction.\n",
    "#  method: Interpolation technique to use. Default is \"linear\".\n",
    "#  order: To be used in case of polynomial and spline interpolation.\n",
    "#  date_format: Boolean to define whether time is some kind of date format instead of a number."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e44c83c0-7c1c-4d72-8580-c61a22775497",
   "metadata": {},
   "source": [
    "#### Examples of some sampling methods"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "047bdc8f-393d-44d2-829c-692270a0ab48",
   "metadata": {},
   "source": [
    "If one has a large data set it can be efficient to decrease the size of the data set by sampling (systematically or randomly) or filtering the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "64235d08-ec38-4043-954f-2cf2dc470913",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>312</td>\n",
       "      <td>405.29</td>\n",
       "      <td>417.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>511</td>\n",
       "      <td>369.99</td>\n",
       "      <td>428.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>607</td>\n",
       "      <td>390.33</td>\n",
       "      <td>405.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>811</td>\n",
       "      <td>445.15</td>\n",
       "      <td>411.94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>905</td>\n",
       "      <td>366.06</td>\n",
       "      <td>451.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>501</td>\n",
       "      <td>312</td>\n",
       "      <td>106.20</td>\n",
       "      <td>386.81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>501</td>\n",
       "      <td>511</td>\n",
       "      <td>111.52</td>\n",
       "      <td>422.73</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>501</td>\n",
       "      <td>607</td>\n",
       "      <td>61.26</td>\n",
       "      <td>365.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>501</td>\n",
       "      <td>811</td>\n",
       "      <td>71.48</td>\n",
       "      <td>332.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>501</td>\n",
       "      <td>905</td>\n",
       "      <td>71.26</td>\n",
       "      <td>365.12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   time  animal_id       x       y\n",
       "0     1        312  405.29  417.76\n",
       "1     1        511  369.99  428.78\n",
       "2     1        607  390.33  405.89\n",
       "3     1        811  445.15  411.94\n",
       "4     1        905  366.06  451.76\n",
       "5   501        312  106.20  386.81\n",
       "6   501        511  111.52  422.73\n",
       "7   501        607   61.26  365.88\n",
       "8   501        811   71.48  332.31\n",
       "9   501        905   71.26  365.12"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "downsampled_data = mkit.resample_systematic(preprocessed_data, 2)\n",
    "filtered_data = mkit.filter_dataframe(preprocessed_data, 5, 6)\n",
    "downsampled_data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59e87f43-0858-4ae1-9820-345ffcdbb54b",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### Methods to replace/convert specific values (duplicates, missings, selected values)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "69ece056-8a2d-4178-953b-ded0defb1166",
   "metadata": {},
   "source": [
    "One can replace the coordinate values for a specific mover at a specific time period. This can be useful method to deal with outliers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2e476ddd-d039-4baa-928d-ebf6cff1c27c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>811</td>\n",
       "      <td>100.00</td>\n",
       "      <td>90.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2</td>\n",
       "      <td>811</td>\n",
       "      <td>445.48</td>\n",
       "      <td>412.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>3</td>\n",
       "      <td>811</td>\n",
       "      <td>100.00</td>\n",
       "      <td>90.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>4</td>\n",
       "      <td>811</td>\n",
       "      <td>446.03</td>\n",
       "      <td>413.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>5</td>\n",
       "      <td>811</td>\n",
       "      <td>446.24</td>\n",
       "      <td>413.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4978</th>\n",
       "      <td>996</td>\n",
       "      <td>811</td>\n",
       "      <td>761.31</td>\n",
       "      <td>307.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4983</th>\n",
       "      <td>997</td>\n",
       "      <td>811</td>\n",
       "      <td>761.56</td>\n",
       "      <td>307.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4988</th>\n",
       "      <td>998</td>\n",
       "      <td>811</td>\n",
       "      <td>761.86</td>\n",
       "      <td>307.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4993</th>\n",
       "      <td>999</td>\n",
       "      <td>811</td>\n",
       "      <td>762.12</td>\n",
       "      <td>307.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4998</th>\n",
       "      <td>1000</td>\n",
       "      <td>811</td>\n",
       "      <td>762.44</td>\n",
       "      <td>307.61</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      time  animal_id       x       y\n",
       "3        1        811  100.00   90.00\n",
       "8        2        811  445.48  412.26\n",
       "13       3        811  100.00   90.00\n",
       "18       4        811  446.03  413.00\n",
       "23       5        811  446.24  413.42\n",
       "...    ...        ...     ...     ...\n",
       "4978   996        811  761.31  307.65\n",
       "4983   997        811  761.56  307.65\n",
       "4988   998        811  761.86  307.65\n",
       "4993   999        811  762.12  307.65\n",
       "4998  1000        811  762.44  307.61\n",
       "\n",
       "[1000 rows x 4 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr_index = np.array([1, 3])\n",
    "replaced_data_groups = mkit.replace_parts_animal_movement(preprocessed_data, 811, arr_index, 100, 90)\n",
    "replaced_data_groups[replaced_data_groups['animal_id']==811]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "511c7705-0950-4eb5-a753-cbf18202b366",
   "metadata": {
    "tags": []
   },
   "source": [
    "In many appliactions it is useful to normalize the data for the coordinates before the analysis. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b82deaf5-670a-40d1-870e-856904f881cf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>312</td>\n",
       "      <td>0.496639</td>\n",
       "      <td>0.849376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>511</td>\n",
       "      <td>0.446887</td>\n",
       "      <td>0.873750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>607</td>\n",
       "      <td>0.475554</td>\n",
       "      <td>0.823122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>811</td>\n",
       "      <td>0.552817</td>\n",
       "      <td>0.836504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>905</td>\n",
       "      <td>0.441348</td>\n",
       "      <td>0.924578</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   time  animal_id         x         y\n",
       "0     1        312  0.496639  0.849376\n",
       "1     1        511  0.446887  0.873750\n",
       "2     1        607  0.475554  0.823122\n",
       "3     1        811  0.552817  0.836504\n",
       "4     1        905  0.441348  0.924578"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "normalized_data = mkit.normalize(data)\n",
    "normalized_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7e91cd2-c3e5-4487-b37e-cd4df758fa96",
   "metadata": {},
   "source": [
    "There are two methods to get an overview over the missing data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "bebb162e-70ba-4e7b-8af2-47276e5d72b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of missing values =  5\n",
      "x            5\n",
      "time         0\n",
      "animal_id    0\n",
      "y            0\n",
      "dtype: int64\n",
      "Duplicate rows based on the columns 'animal_id' and 'time' column are:\n",
      "Empty DataFrame\n",
      "Columns: [time, animal_id, x, y]\n",
      "Index: []\n"
     ]
    }
   ],
   "source": [
    "#for demonstration set all x values at time period 3 to NaN\n",
    "missing_data = data\n",
    "missing_data.loc[data['time'] == 3, 'x'] = np.NaN\n",
    "\n",
    "mkit.print_missing(missing_data)\n",
    "mkit.print_duplicate(missing_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb22941e-0848-4166-9d75-275db9618814",
   "metadata": {},
   "source": [
    "#### Making a pandas DataFrame compatible with `movekit`"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "49db6994-0983-46ea-a2fc-91fc053fe1f4",
   "metadata": {},
   "source": [
    "If one has the data stored in a Pandas DataFrame one can easily make the DataFrame compatible with `movekit`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e7f15f7b-6122-4e14-ba2e-e1262e1c07d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>A</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>C</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>D</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   time animal_id  x  y\n",
       "0     0         A  0  5\n",
       "1     1         B  1  6\n",
       "2     2         C  2  7\n",
       "3     3         D  3  8"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Parameters:\n",
    "# data: the existing data frame\n",
    "# dictionary: Key-value pairs of column names. Keys store the old column names. The respective new column names are stored as their values. Values that need to be defined include 'time', 'animal_id', 'x' and 'y'.\n",
    "\n",
    "wrong_df = pd.DataFrame({'Time':[0,1,2,3],'IDs':['A','B','C','D'],'x-values':[0,1,2,3],'y-values':[5,6,7,8]})\n",
    "correct_df = mkit.from_dataframe(wrong_df, {'Time': 'time', 'IDs': 'animal_id', 'x-values': 'x', 'y-values': 'y'})\n",
    "correct_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "34ad97a4-7d9c-4135-a11b-31d5e8b599ab",
   "metadata": {},
   "source": [
    "#### Support for 3d datasets\n",
    "\n",
    "`movekit` also supports movement in three dimensions. All function calls remain the same for the user as the presence of a third dimension in the data is recognized by `movekit`.\n",
    "\n",
    "Below we show an example of a 3D dataset that can be given to `movekit`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "aa69040b-1f3f-49ad-bfb1-4978a3ec2370",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>312</td>\n",
       "      <td>0.496639</td>\n",
       "      <td>0.849376</td>\n",
       "      <td>-0.271515</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>511</td>\n",
       "      <td>0.446887</td>\n",
       "      <td>0.873750</td>\n",
       "      <td>0.209787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>607</td>\n",
       "      <td>0.475554</td>\n",
       "      <td>0.823122</td>\n",
       "      <td>-0.518311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>811</td>\n",
       "      <td>0.552817</td>\n",
       "      <td>0.836504</td>\n",
       "      <td>-0.104081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>905</td>\n",
       "      <td>0.441348</td>\n",
       "      <td>0.924578</td>\n",
       "      <td>1.042317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4995</th>\n",
       "      <td>1000</td>\n",
       "      <td>312</td>\n",
       "      <td>0.941539</td>\n",
       "      <td>0.466381</td>\n",
       "      <td>-0.624974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4996</th>\n",
       "      <td>1000</td>\n",
       "      <td>511</td>\n",
       "      <td>0.859231</td>\n",
       "      <td>0.423671</td>\n",
       "      <td>-1.143292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4997</th>\n",
       "      <td>1000</td>\n",
       "      <td>607</td>\n",
       "      <td>0.944062</td>\n",
       "      <td>0.580819</td>\n",
       "      <td>-0.840995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4998</th>\n",
       "      <td>1000</td>\n",
       "      <td>811</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.605746</td>\n",
       "      <td>-1.494142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4999</th>\n",
       "      <td>1000</td>\n",
       "      <td>905</td>\n",
       "      <td>0.880865</td>\n",
       "      <td>0.334668</td>\n",
       "      <td>-0.910113</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5000 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      time  animal_id         x         y         z\n",
       "0        1        312  0.496639  0.849376 -0.271515\n",
       "1        1        511  0.446887  0.873750  0.209787\n",
       "2        1        607  0.475554  0.823122 -0.518311\n",
       "3        1        811  0.552817  0.836504 -0.104081\n",
       "4        1        905  0.441348  0.924578  1.042317\n",
       "...    ...        ...       ...       ...       ...\n",
       "4995  1000        312  0.941539  0.466381 -0.624974\n",
       "4996  1000        511  0.859231  0.423671 -1.143292\n",
       "4997  1000        607  0.944062  0.580819 -0.840995\n",
       "4998  1000        811  1.000000  0.605746 -1.494142\n",
       "4999  1000        905  0.880865  0.334668 -0.910113\n",
       "\n",
       "[5000 rows x 5 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a synthetic 3D dataset by appending a third dimension to the 2D dataset from above\n",
    "z = np.random.normal(loc=0.0, scale=1.0, size=len(preprocessed_data))\n",
    "preprocessed_data['z'] = z\n",
    "preprocessed_data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9876b5e7-9b81-4d63-ac0e-bc66b99c88de",
   "metadata": {
    "tags": []
   },
   "source": [
    "#### Support for geographic coordinates\n",
    "\n",
    "`movekit` is able to project data from GPS coordinates in the latitude and longitude format to the cartesian coordinate system."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "91d2a975-b315-43a6-8d29-71822eab82fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>47.691358</td>\n",
       "      <td>9.176731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>52.472161</td>\n",
       "      <td>13.402034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>47.692101</td>\n",
       "      <td>9.055353</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   time  animal_id   latitude  longitude\n",
       "0     1          1  47.691358   9.176731\n",
       "1     1          2  52.472161  13.402034\n",
       "2     1          3  47.692101   9.055353"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "path = \"./datasets/geo.csv\"\n",
    "\n",
    "# Read in file using \n",
    "geo_data = pd.read_csv(path, sep=';')\n",
    "geo_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "57c9419a-ee0d-4bdd-a27a-f5440822c063",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>513261.777038</td>\n",
       "      <td>5.282012e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>391460.276950</td>\n",
       "      <td>5.814756e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>504153.593963</td>\n",
       "      <td>5.282081e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   time  animal_id              x             y\n",
       "0     1          1  513261.777038  5.282012e+06\n",
       "1     1          2  391460.276950  5.814756e+06\n",
       "2     1          3  504153.593963  5.282081e+06"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# mkit.convert_latlon(data, latitude='latitude', longitude='longitude', replace=True)\n",
    "\n",
    "#Parameters:\n",
    "#data: DataFrame with GPS coordinates\n",
    "#latitude: str. Name of the column where latitude is stored\n",
    "#longitude: str. Name of the column where longitude is stored\n",
    "#replace: bool. Flag whether the xy columns should replace the latlon columns\n",
    "#return: DataFrame after the transformation where latitude is projected into y and longitude is projected into x\n",
    "\n",
    "projected_data = mkit.convert_latlon(geo_data)\n",
    "projected_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18a469d4-8dd0-44e7-83c0-213600e48dff",
   "metadata": {},
   "source": [
    "#### Support for data stored as GeoJSON and JSON\n",
    "\n",
    "`movekit` is able to read data stored as GeoJSON (.geojson) or JSON (.json) file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "bb2b023d-a9cc-4c3d-ac1a-aae751bcde8d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>animal_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>fish1</td>\n",
       "      <td>99.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>fish2</td>\n",
       "      <td>120.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>fish3</td>\n",
       "      <td>120.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>fish1</td>\n",
       "      <td>101.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>fish2</td>\n",
       "      <td>200.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>fish3</td>\n",
       "      <td>33.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>fish1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3</td>\n",
       "      <td>fish2</td>\n",
       "      <td>125.0</td>\n",
       "      <td>43.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>3</td>\n",
       "      <td>fish3</td>\n",
       "      <td>45.0</td>\n",
       "      <td>87.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>4</td>\n",
       "      <td>fish1</td>\n",
       "      <td>-44.0</td>\n",
       "      <td>-11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>4</td>\n",
       "      <td>fish2</td>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>4</td>\n",
       "      <td>fish3</td>\n",
       "      <td>11.0</td>\n",
       "      <td>-12.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    time animal_id      x     y\n",
       "0      1     fish1   99.0   0.0\n",
       "1      1     fish2  120.0   4.0\n",
       "2      1     fish3  120.0   6.0\n",
       "3      2     fish1  101.0   1.0\n",
       "4      2     fish2  200.0   5.0\n",
       "5      2     fish3   33.0   5.0\n",
       "6      3     fish1    8.0   8.0\n",
       "7      3     fish2  125.0  43.0\n",
       "8      3     fish3   45.0  87.0\n",
       "9      4     fish1  -44.0 -11.0\n",
       "10     4     fish2   12.0   5.0\n",
       "11     4     fish3   11.0 -12.0"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "json_data = mkit.read_geojson('./datasets/fish-4.geojson')\n",
    "json_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8e9af57-448a-4285-94e1-9dfc0032adae",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}