{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Group feature extraction" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import movekit as mkit\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Extracting all absolute features: 100%|██████████| 100.0/100 [00:01<00:00, 67.56it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timeanimal_idxydistanceaverage_speedaverage_accelerationdirectionstoppedturning
01312405.29417.760.00.210217-0.006079(0.0, 0.0)10.0
11511369.99428.780.00.0209440.000041(0.0, 0.0)10.0
21607390.33405.890.00.0702350.000344(0.0, 0.0)10.0
31811445.15411.940.00.3705000.007092(0.0, 0.0)10.0
41905366.06451.760.00.118000-0.003975(0.0, 0.0)10.0
\n", "
" ], "text/plain": [ " time animal_id x y distance average_speed \\\n", "0 1 312 405.29 417.76 0.0 0.210217 \n", "1 1 511 369.99 428.78 0.0 0.020944 \n", "2 1 607 390.33 405.89 0.0 0.070235 \n", "3 1 811 445.15 411.94 0.0 0.370500 \n", "4 1 905 366.06 451.76 0.0 0.118000 \n", "\n", " average_acceleration direction stopped turning \n", "0 -0.006079 (0.0, 0.0) 1 0.0 \n", "1 0.000041 (0.0, 0.0) 1 0.0 \n", "2 0.000344 (0.0, 0.0) 1 0.0 \n", "3 0.007092 (0.0, 0.0) 1 0.0 \n", "4 -0.003975 (0.0, 0.0) 1 0.0 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = \"./datasets/fish-5-cleaned.csv\"\n", "data = mkit.read_data(path)\n", "data = mkit.extract_features(data)\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Detecting outliers\n", "Function performs detection of outliers, based on the KNN algorithm: user can define the regarding features for the detection, the number of the nearest neighbors taken into account for the outlier classification, the metric to calculate the distance, the method to aggregate the different distances, and the share of outliers." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timeanimal_idoutlierxydistanceaverage_speedaverage_accelerationdirectionstoppedturning
523121405.31417.370.3905120.192177-0.006451(0.02, -0.39)10.000000
828111445.48412.260.4596740.3879830.007893(0.33, 0.32)10.000000
2603521811171.65333.290.4726520.3411610.030547(0.05, 0.47)10.437319
2608522811171.56334.190.9044890.3472700.029050(-0.09, 0.9)10.978928
34866985111113.96283.464.3426373.8883470.326774(2.12, -3.79)00.999926
\n", "
" ], "text/plain": [ " time animal_id outlier x y distance average_speed \\\n", "5 2 312 1 405.31 417.37 0.390512 0.192177 \n", "8 2 811 1 445.48 412.26 0.459674 0.387983 \n", "2603 521 811 1 71.65 333.29 0.472652 0.341161 \n", "2608 522 811 1 71.56 334.19 0.904489 0.347270 \n", "3486 698 511 1 113.96 283.46 4.342637 3.888347 \n", "\n", " average_acceleration direction stopped turning \n", "5 -0.006451 (0.02, -0.39) 1 0.000000 \n", "8 0.007893 (0.33, 0.32) 1 0.000000 \n", "2603 0.030547 (0.05, 0.47) 1 0.437319 \n", "2608 0.029050 (-0.09, 0.9) 1 0.978928 \n", "3486 0.326774 (2.12, -3.79) 0 0.999926 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Detect outliers based on KNN.\n", "# mkit.outlier_detection(dataset, features=[\"distance\", \"average_speed\", \"average_acceleration\",\n", "# \"stopped\", \"turning\"], contamination=0.01, n_neighbors=5, method=\"mean\", metric=\"minkowski\")\n", "outs = mkit.outlier_detection(data)\n", "# printing all rows where outliers are present\n", "outs[outs.loc[:,\"outlier\"] == 1].head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timeanimal_idoutlierxydistanceaverage_speedaverage_accelerationdirectionstoppedturning
15273066071176.53416.773.0103993.195286-0.075984(-3.0, -0.25)00.999919
16073226071130.81410.811.7801692.088872-0.221172(-1.77, -0.19)00.999996
16123236071129.26410.631.5604171.824672-0.227409(-1.55, -0.18)00.999962
16173246071127.95410.581.3109541.561242-0.224810(-1.31, -0.05)00.997001
16223256071126.90410.581.0500001.296308-0.216147(-1.05, 0.0)00.999272
\n", "
" ], "text/plain": [ " time animal_id outlier x y distance average_speed \\\n", "1527 306 607 1 176.53 416.77 3.010399 3.195286 \n", "1607 322 607 1 130.81 410.81 1.780169 2.088872 \n", "1612 323 607 1 129.26 410.63 1.560417 1.824672 \n", "1617 324 607 1 127.95 410.58 1.310954 1.561242 \n", "1622 325 607 1 126.90 410.58 1.050000 1.296308 \n", "\n", " average_acceleration direction stopped turning \n", "1527 -0.075984 (-3.0, -0.25) 0 0.999919 \n", "1607 -0.221172 (-1.77, -0.19) 0 0.999996 \n", "1612 -0.227409 (-1.55, -0.18) 0 0.999962 \n", "1617 -0.224810 (-1.31, -0.05) 0 0.997001 \n", "1622 -0.216147 (-1.05, 0.0) 0 0.999272 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# same function, different parameters\n", "other_outs = mkit.outlier_detection(dataset = data, features = [\"average_speed\", \"average_acceleration\"], contamination = 0.05, n_neighbors = 8, method = \"median\", metric = \"euclidean\")\n", "\n", "# printing all rows where outliers are present\n", "other_outs[other_outs.loc[:,\"outlier\"] == 1].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Group-level Analysis\n", "\n", "Below we perform Analysis on Group-Level. This consists of:\n", "- Group-Level averages,\n", "- Centroid Medoid computation\n", "- A dynamic time warping matrix, \n", "- A clustering over time based on absolute features,\n", "- The centroid direction,\n", "- The heading difference of each animal with respect to the current centroid\n", "- The group - polarization for each timestep. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Obtain group-level records for each point in time\n", "Records consist of total group-distance covered, mean speed, mean acceleration and mean distance from centroid for each timestamp. If input doesn't contain centroid or feature data, it is calculated, showing a warning." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Calculating centroid distances: 100%|██████████| 1000/1000 [00:07<00:00, 132.25it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_distmean_speedmean_accelerationmean_distance_centroid
time
10.0000000.157979-0.00051529.4616
21.1749080.157641-0.00033129.5850
31.0251550.1556100.00181829.6914
40.9189600.1535790.00155129.7782
50.8304610.1533410.00160329.8518
\n", "
" ], "text/plain": [ " total_dist mean_speed mean_acceleration mean_distance_centroid\n", "time \n", "1 0.000000 0.157979 -0.000515 29.4616\n", "2 1.174908 0.157641 -0.000331 29.5850\n", "3 1.025155 0.155610 0.001818 29.6914\n", "4 0.918960 0.153579 0.001551 29.7782\n", "5 0.830461 0.153341 0.001603 29.8518" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "group_data = mkit.group_movement(data)\n", "group_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Obtain centroid, medoid and distance to centroid for each movement record" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Calculating centroid distances: 100%|██████████| 1000/1000 [00:05<00:00, 188.64it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timeanimal_idoutlierxydistanceaverage_speedaverage_accelerationdirectionstoppedturningx_centroidy_centroidmedoiddistance_to_centroid
013120405.29417.760.00.210217-0.006079(0.0, 0.0)10.0395.364423.22631211.331
115110369.99428.780.00.0209440.000041(0.0, 0.0)10.0395.364423.22631225.975
216070390.33405.890.00.0702350.000344(0.0, 0.0)10.0395.364423.22631218.052
318110445.15411.940.00.3705000.007092(0.0, 0.0)10.0395.364423.22631251.049
419050366.06451.760.00.118000-0.003975(0.0, 0.0)10.0395.364423.22631240.901
\n", "
" ], "text/plain": [ " time animal_id outlier x y distance average_speed \\\n", "0 1 312 0 405.29 417.76 0.0 0.210217 \n", "1 1 511 0 369.99 428.78 0.0 0.020944 \n", "2 1 607 0 390.33 405.89 0.0 0.070235 \n", "3 1 811 0 445.15 411.94 0.0 0.370500 \n", "4 1 905 0 366.06 451.76 0.0 0.118000 \n", "\n", " average_acceleration direction stopped turning x_centroid y_centroid \\\n", "0 -0.006079 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "1 0.000041 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "2 0.000344 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "3 0.007092 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "4 -0.003975 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "\n", " medoid distance_to_centroid \n", "0 312 11.331 \n", "1 312 25.975 \n", "2 312 18.052 \n", "3 312 51.049 \n", "4 312 40.901 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movement = mkit.centroid_medoid_computation(data, object_output = False)\n", "movement.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Get the heading difference between centroids and animal's direction\n", "Heading difference is computed with the cosine similarity of the two direction vectors, thus ranges from -1 to 1. While 1 indicates the animal and the centroid have the same direction, -1 indicates they move in different directions." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Calculating centroid distances: 100%|██████████| 1000/1000 [00:02<00:00, 361.77it/s]\n", "Computing centroid direction: 100%|██████████| 100.0/100 [00:00<00:00, 758.23it/s]\n", "Calculating centroid distances: 100%|██████████| 1000/1000 [00:04<00:00, 222.59it/s]\n", "Calculating heading difference: 100%|██████████| 100.0/100 [00:01<00:00, 76.04it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timeanimal_idoutlierxydistanceaverage_speedaverage_accelerationdirectionstoppedturningx_centroidy_centroidmedoiddistance_to_centroidcentroid_directionheading_difference
013120405.29417.760.00.210217-0.006079(0.0, 0.0)10.0395.364423.22631211.331(0.0, 0.0)0.0
115110369.99428.780.00.0209440.000041(0.0, 0.0)10.0395.364423.22631225.975(0.0, 0.0)0.0
216070390.33405.890.00.0702350.000344(0.0, 0.0)10.0395.364423.22631218.052(0.0, 0.0)0.0
318110445.15411.940.00.3705000.007092(0.0, 0.0)10.0395.364423.22631251.049(0.0, 0.0)0.0
419050366.06451.760.00.118000-0.003975(0.0, 0.0)10.0395.364423.22631240.901(0.0, 0.0)0.0
\n", "
" ], "text/plain": [ " time animal_id outlier x y distance average_speed \\\n", "0 1 312 0 405.29 417.76 0.0 0.210217 \n", "1 1 511 0 369.99 428.78 0.0 0.020944 \n", "2 1 607 0 390.33 405.89 0.0 0.070235 \n", "3 1 811 0 445.15 411.94 0.0 0.370500 \n", "4 1 905 0 366.06 451.76 0.0 0.118000 \n", "\n", " average_acceleration direction stopped turning x_centroid y_centroid \\\n", "0 -0.006079 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "1 0.000041 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "2 0.000344 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "3 0.007092 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "4 -0.003975 (0.0, 0.0) 1 0.0 395.364 423.226 \n", "\n", " medoid distance_to_centroid centroid_direction heading_difference \n", "0 312 11.331 (0.0, 0.0) 0.0 \n", "1 312 25.975 (0.0, 0.0) 0.0 \n", "2 312 18.052 (0.0, 0.0) 0.0 \n", "3 312 51.049 (0.0, 0.0) 0.0 \n", "4 312 40.901 (0.0, 0.0) 0.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "centroid_dir = mkit.compute_centroid_direction(data).sort_values(['time','animal_id'])\n", "heading_diff = mkit.get_heading_difference(data)\n", "heading_diff.head()\n" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "#### Obtain a matrix, based on dynamic time warping\n", "Each Animal-ID is displayed in the indices, the entries reflect the similarity of the animal's trajectories based on the DTW algorithm." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Calculating dynamic time warping: 100%|██████████| 5/5 [00:07<00:00, 1.51s/it]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
312511607811905
3120.00000030843.08540332859.60013942461.52455337916.447829
51130843.0854030.00000026931.01432347116.70811620967.960073
60732859.60013926931.0143230.00000039859.78792435711.718898
81142461.52455347116.70811639859.7879240.00000038379.806433
90537916.44782920967.96007335711.71889838379.8064330.000000
\n", "
" ], "text/plain": [ " 312 511 607 811 905\n", "312 0.000000 30843.085403 32859.600139 42461.524553 37916.447829\n", "511 30843.085403 0.000000 26931.014323 47116.708116 20967.960073\n", "607 32859.600139 26931.014323 0.000000 39859.787924 35711.718898\n", "811 42461.524553 47116.708116 39859.787924 0.000000 38379.806433\n", "905 37916.447829 20967.960073 35711.718898 38379.806433 0.000000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Obtain dynamic time warping amongst all trajectories from the animals. The lower the value for two animals is, the more similar their trajectories are based on the DTW algorithm.\n", "#mkit.dtw_matrix(preprocessed_data, path=False, distance=euclidean)\n", "#preprocessed_data: DataFrame containing the movement data.\n", "#path: Boolean to specify if matrix of dtw-path gets returned as well. (the warping path for all the sequence pairs which are examined)\n", "#distance: Specify with distance measure to use. Default: \"euclidean\". Other example alternatives are pdist or minkowski. (all distances defined by fastdtw package are possible.\n", "\n", "mkit.dtw_matrix(data)" ] } ], "metadata": { "kernelspec": { "display_name": "mkit", "language": "python", "name": "mkit" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 4 }