From 5638fa11f1cb485c45218be80ac63565a86c0209 Mon Sep 17 00:00:00 2001
From: Manuel <mansanher@gmail.com>
Date: Sat, 24 Apr 2021 00:10:45 +0200
Subject: [PATCH 1/2] Adding pandarallel library

Pandarallel allows to execute pandas apply method in parallel, which allows to do data preprocessing faster and easier. This is very he;pful in kernel only competiotions.

See:

https://github.com/nalepae/pandarallel
---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index 21c8e975..6bd15db6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -427,6 +427,7 @@ RUN pip install flashtext && \
     pip install jax==0.2.12 jaxlib==0.1.64 && \
     # ipympl adds interactive widget support for matplotlib
     pip install ipympl==0.7.0 && \
+    pip install pandarallel && \
     /tmp/clean-layer.sh
 
 # Download base easyocr models.

From 8b379085e99d46e6efcf50dcb8eda4197311bbbc Mon Sep 17 00:00:00 2001
From: Manuel <mansanher@gmail.com>
Date: Wed, 28 Apr 2021 09:34:56 +0200
Subject: [PATCH 2/2] Adding test to pandarallel

Adding a simple test
---
 tests/test_pandarralel.py | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 tests/test_pandarralel.py

diff --git a/tests/test_pandarralel.py b/tests/test_pandarralel.py
new file mode 100644
index 00000000..fe74b0e1
--- /dev/null
+++ b/tests/test_pandarralel.py
@@ -0,0 +1,11 @@
+import unittest
+
+import pandas as pd
+from pandarallel import pandarallel
+
+pandarallel.initialize()
+
+class TestPandarallel(unittest.TestCase):    
+    def test_pandarallel(self):
+        data = pd.read_csv("/input/tests/data/train.csv")
+        data['label_converted'] = data['label'].parallel_apply(lambda x: x+1)