From 850d11661320c15874dbca058dc0a885fd876644 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Tue, 13 Oct 2020 17:34:49 +0200
Subject: [PATCH 01/10] bugfix

---
 README.md                                |  4 +-
 examples/plot_partial_wass_and_gromov.py | 23 ++++++---
 ot/partial.py                            | 62 +++++++++++++++---------
 3 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/README.md b/README.md
index 9d113bddb..d7bffb0b5 100644
--- a/README.md
+++ b/README.md
@@ -262,6 +262,6 @@ You can also post bug reports and feature requests in Github issues. Make sure t
 
 [27] Redko I., Courty N., Flamary R., Tuia D. (2019). [Optimal Transport for Multi-source Domain Adaptation under Target Shift](http://proceedings.mlr.press/v89/redko19a.html), Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics (AISTATS) 22, 2019.
 
-[28] Caffarelli, L. A., McCann, R. J. (2020). [Free boundaries in optimal transport and Monge-Ampere obstacle problems](http://www.math.toronto.edu/~mccann/papers/annals2010.pdf), Annals of mathematics, 673-730.
+[28] Caffarelli, L. A., McCann, R. J. (2010). [Free boundaries in optimal transport and Monge-Ampere obstacle problems](http://www.math.toronto.edu/~mccann/papers/annals2010.pdf), Annals of mathematics, 673-730.
 
-[29] Chapel, L., Alaya, M., Gasso, G. (2019). [Partial Gromov-Wasserstein with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), arXiv preprint arXiv:2002.08276.
\ No newline at end of file
+[29] Chapel, L., Alaya, M., Gasso, G. (2020). [Partial Optimal Transport with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), Neural Information Processing Systems (NeurIPS), 2020.
\ No newline at end of file
diff --git a/examples/plot_partial_wass_and_gromov.py b/examples/plot_partial_wass_and_gromov.py
index a5af44113..a2b0c791b 100755
--- a/examples/plot_partial_wass_and_gromov.py
+++ b/examples/plot_partial_wass_and_gromov.py
@@ -4,13 +4,17 @@
 Partial Wasserstein and Gromov-Wasserstein example
 ==================================================
 
-This example is designed to show how to use the Partial (Gromov-)Wassertsein
+This example is designed to show how to use the Partial (Gromov-)Wasserstein
 distance computation in POT.
 """
 
 # Author: Laetitia Chapel <laetitia.chapel@irisa.fr>
 # License: MIT License
 
+# sphinx_gallery_thumbnail_number = 2
+
+# necessary for 3d plot even if not used
+from mpl_toolkits.mplot3d import Axes3D  # noqa
 import scipy as sp
 import numpy as np
 import matplotlib.pylab as pl
@@ -123,7 +127,8 @@
 m = 1
 res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True)
 res, log = ot.partial.entropic_partial_gromov_wasserstein(C1, C2, p, q, 10,
-                                                          m=m, log=True)
+                                                          m=m, log=True,
+                                                          verbose=True)
 
 print('Wasserstein distance (m = 1): ' + str(log0['partial_gw_dist']))
 print('Entropic Wasserstein distance (m = 1): ' + str(log['partial_gw_dist']))
@@ -132,18 +137,20 @@
 pl.title("mass to be transported m = 1")
 pl.subplot(1, 2, 1)
 pl.imshow(res0, cmap='jet')
-pl.title('Wasserstein')
+pl.title('Gromov-Wasserstein')
 pl.subplot(1, 2, 2)
 pl.imshow(res, cmap='jet')
-pl.title('Entropic Wasserstein')
+pl.title('Entropic Gromov-Wasserstein')
 pl.show()
 
 # transport 2/3 of the mass
 print('-----m = 2/3')
 m = 2 / 3
-res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True)
+res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True,
+                                                   verbose=True)
 res, log = ot.partial.entropic_partial_gromov_wasserstein(C1, C2, p, q, 10,
-                                                          m=m, log=True)
+                                                          m=m, log=True,
+                                                          verbose=True)
 
 print('Partial Wasserstein distance (m = 2/3): ' +
       str(log0['partial_gw_dist']))
@@ -154,8 +161,8 @@
 pl.title("mass to be transported m = 2/3")
 pl.subplot(1, 2, 1)
 pl.imshow(res0, cmap='jet')
-pl.title('Partial Wasserstein')
+pl.title('Partial Gromov-Wasserstein')
 pl.subplot(1, 2, 2)
 pl.imshow(res, cmap='jet')
-pl.title('Entropic partial Wasserstein')
+pl.title('Entropic partial Gromov-Wasserstein')
 pl.show()
diff --git a/ot/partial.py b/ot/partial.py
index c03ec25e4..1cbba6e2c 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -230,9 +230,9 @@ def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
     ..  [28] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in
         optimal transport and Monge-Ampere obstacle problems. Annals of
         mathematics, 673-730.
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
-        Wasserstein with Applications on Positive-Unlabeled Learning".
-        arXiv preprint arXiv:2002.08276.
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
 
     See Also
     --------
@@ -254,7 +254,7 @@ def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
     b_extended = np.append(b, [(np.sum(a) - m) / nb_dummies] * nb_dummies)
     a_extended = np.append(a, [(np.sum(b) - m) / nb_dummies] * nb_dummies)
     M_extended = np.zeros((len(a_extended), len(b_extended)))
-    M_extended[-1, -1] = np.max(M) * 1e5
+    M_extended[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5
     M_extended[:len(a), :len(b)] = M
 
     gamma, log_emd = emd(a_extended, b_extended, M_extended, log=True,
@@ -344,9 +344,9 @@ def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
     ..  [28] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in
         optimal transport and Monge-Ampere obstacle problems. Annals of
         mathematics, 673-730.
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
-        Wasserstein with Applications on Positive-Unlabeled Learning".
-        arXiv preprint arXiv:2002.08276.
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
     """
 
     partial_gw, log_w = partial_wasserstein(a, b, M, m, nb_dummies, log=True,
@@ -506,9 +506,9 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
 
     References
     ----------
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
-        Wasserstein with Applications on Positive-Unlabeled Learning".
-        arXiv preprint arXiv:2002.08276.
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
 
     """
 
@@ -530,17 +530,15 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
 
     cpt = 0
     err = 1
-    eps = 1e-20
+
     if log:
         log = {'err': []}
 
     while (err > tol and cpt < numItermax):
 
-        Gprev = G0
+        Gprev = G0.copy()
 
         M = gwgrad_partial(C1, C2, G0)
-        M[M < eps] = np.quantile(M, thres)
-
         M_emd = np.zeros(dim_G_extended)
         M_emd[:len(p), :len(q)] = M
         M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5
@@ -565,6 +563,21 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
                 print('{:5d}|{:8e}|{:8e}'.format(cpt, err,
                                                  gwloss_partial(C1, C2, G0)))
 
+        deltaG = G0 - Gprev
+        grad = gwgrad_partial(C1, C2, deltaG)
+        a = gwloss_partial(C1, C2, deltaG)
+        b = 2 * np.sum(grad * Gprev)
+
+        if a > 0:
+            gamma = min(1, np.divide(-b, 2.0 * a))
+        else:
+            if (a+b) < 0:
+                gamma = 1
+            else:
+                gamma = 0
+
+        G0 = Gprev + gamma * deltaG
+
         cpt += 1
 
     if log:
@@ -665,9 +678,9 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
 
     References
     ----------
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
-        Wasserstein with Applications on Positive-Unlabeled Learning".
-        arXiv preprint arXiv:2002.08276.
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
 
     """
 
@@ -892,7 +905,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
            [0.13, 0.12, 0.  , 0.  ],
            [0.  , 0.  , 0.25, 0.  ],
            [0.  , 0.  , 0.  , 0.25]])
-    >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50, m=0.25), 2)
+    >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50, m=0.25),
+                 2)
     array([[0.02, 0.03, 0.  , 0.03],
            [0.03, 0.03, 0.  , 0.03],
            [0.  , 0.  , 0.03, 0.  ],
@@ -910,9 +924,9 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
     .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
         "Gromov-Wasserstein averaging of kernel and distance matrices."
         International Conference on Machine Learning (ICML). 2016.
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
-        Wasserstein with Applications on Positive-Unlabeled Learning".
-        arXiv preprint arXiv:2002.08276.
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
 
     See Also
     --------
@@ -1044,9 +1058,9 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
     .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
         "Gromov-Wasserstein averaging of kernel and distance matrices."
         International Conference on Machine Learning (ICML). 2016.
-    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
-        Wasserstein with Applications on Positive-Unlabeled Learning".
-        arXiv preprint arXiv:2002.08276.
+    ..  [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+        Transport with Applications on Positive-Unlabeled Learning".
+        NeurIPS.
     """
 
     partial_gw, log_gw = entropic_partial_gromov_wasserstein(C1, C2, p, q, reg,

From 2b433d1c69c7e22dfbc6493078d8e4ee278eba59 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 10:13:56 +0200
Subject: [PATCH 02/10] update refs partial OT

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d7bffb0b5..7d15cfef4 100644
--- a/README.md
+++ b/README.md
@@ -264,4 +264,4 @@ You can also post bug reports and feature requests in Github issues. Make sure t
 
 [28] Caffarelli, L. A., McCann, R. J. (2010). [Free boundaries in optimal transport and Monge-Ampere obstacle problems](http://www.math.toronto.edu/~mccann/papers/annals2010.pdf), Annals of mathematics, 673-730.
 
-[29] Chapel, L., Alaya, M., Gasso, G. (2020). [Partial Optimal Transport with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), Neural Information Processing Systems (NeurIPS), 2020.
\ No newline at end of file
+[29] Chapel, L., Alaya, M., Gasso, G. (2020). [Partial Optimal Transport with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), Advances in Neural Information Processing Systems (NeurIPS), 2020.
\ No newline at end of file

From bf40e97cd11f2674afdbbcb7394d29bba6424a16 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 10:17:12 +0200
Subject: [PATCH 03/10] fixes small typos in plot_partial_wass_and_gromov

---
 examples/plot_partial_wass_and_gromov.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/plot_partial_wass_and_gromov.py b/examples/plot_partial_wass_and_gromov.py
index a2b0c791b..ac4194ca0 100755
--- a/examples/plot_partial_wass_and_gromov.py
+++ b/examples/plot_partial_wass_and_gromov.py
@@ -123,7 +123,7 @@
 C2 = sp.spatial.distance.cdist(xt, xt)
 
 # transport 100% of the mass
-print('-----m = 1')
+print('------m = 1')
 m = 1
 res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True)
 res, log = ot.partial.entropic_partial_gromov_wasserstein(C1, C2, p, q, 10,
@@ -144,7 +144,7 @@
 pl.show()
 
 # transport 2/3 of the mass
-print('-----m = 2/3')
+print('------m = 2/3')
 m = 2 / 3
 res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True,
                                                    verbose=True)

From 8bfdd43063e9202a98d445b8c8e501e67715c9c3 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 10:19:11 +0200
Subject: [PATCH 04/10] fix small bugs in partial.py

---
 ot/partial.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ot/partial.py b/ot/partial.py
index 1cbba6e2c..c255e538d 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -351,7 +351,6 @@ def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
 
     partial_gw, log_w = partial_wasserstein(a, b, M, m, nb_dummies, log=True,
                                             **kwargs)
-
     log_w['T'] = partial_gw
 
     if log:

From 5302f6d797696a871873c4160b6e208fd92fdf88 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 10:51:43 +0200
Subject: [PATCH 05/10] update README

---
 README.md | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 000000000..da8f20b4e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,270 @@
+# POT: Python Optimal Transport
+
+[![PyPI version](https://badge.fury.io/py/POT.svg)](https://badge.fury.io/py/POT)
+[![Anaconda Cloud](https://anaconda.org/conda-forge/pot/badges/version.svg)](https://anaconda.org/conda-forge/pot)
+[![Build Status](https://github.com/PythonOT/POT/workflows/build/badge.svg?branch=master&event=push)](https://github.com/PythonOT/POT/actions)
+[![Codecov Status](https://codecov.io/gh/PythonOT/POT/branch/master/graph/badge.svg)](https://codecov.io/gh/PythonOT/POT)
+[![Downloads](https://pepy.tech/badge/pot)](https://pepy.tech/project/pot)
+[![Anaconda downloads](https://anaconda.org/conda-forge/pot/badges/downloads.svg)](https://anaconda.org/conda-forge/pot)
+[![License](https://anaconda.org/conda-forge/pot/badges/license.svg)](https://github.com/PythonOT/POT/blob/master/LICENSE)
+
+
+This open source Python library provide several solvers for optimization
+problems related to Optimal Transport for signal, image processing and machine
+learning.
+
+Website and documentation: [https://PythonOT.github.io/](https://PythonOT.github.io/)
+
+Source Code (MIT): [https://github.com/PythonOT/POT](https://github.com/PythonOT/POT)
+
+POT provides the following generic OT solvers (links to examples):
+
+* [OT Network Simplex solver](https://pythonot.github.io/auto_examples/plot_OT_1D.html) for the linear program/ Earth Movers Distance [1] .
+* [Conditional gradient](https://pythonot.github.io/auto_examples/plot_optim_OTreg.html) [6] and [Generalized conditional gradient](https://pythonot.github.io/auto_examples/plot_optim_OTreg.html) for regularized OT [7].
+* Entropic regularization OT solver with [Sinkhorn Knopp Algorithm](https://pythonot.github.io/auto_examples/plot_OT_1D.html) [2] , stabilized version [9] [10], greedy Sinkhorn [22] and [Screening Sinkhorn [26] ](https://pythonot.github.io/auto_examples/plot_screenkhorn_1D.html) with optional GPU implementation (requires cupy).
+* Bregman projections for [Wasserstein barycenter](https://pythonot.github.io/auto_examples/barycenters/plot_barycenter_lp_vs_entropic.html) [3], [convolutional barycenter](https://pythonot.github.io/auto_examples/barycenters/plot_convolutional_barycenter.html) [21]  and unmixing [4].
+* Sinkhorn divergence [23] and entropic regularization OT  from empirical data.
+* [Smooth optimal transport solvers](https://pythonot.github.io/auto_examples/plot_OT_1D_smooth.html) (dual and semi-dual) for KL and squared L2 regularizations [17].
+* Non regularized [Wasserstein barycenters [16] ](https://pythonot.github.io/auto_examples/barycenters/plot_barycenter_lp_vs_entropic.html)) with LP solver (only small scale).
+* [Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) and [GW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_gromov_barycenter.html)  (exact [13] and regularized [12])
+ * [Fused-Gromov-Wasserstein distances solver](https://pythonot.github.io/auto_examples/gromov/plot_fgw.html#sphx-glr-auto-examples-plot-fgw-py) and [FGW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_barycenter_fgw.html) [24]
+* [Stochastic solver](https://pythonot.github.io/auto_examples/plot_stochastic.html) for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19])
+* Non regularized [free support Wasserstein barycenters](https://pythonot.github.io/auto_examples/barycenters/plot_free_support_barycenter.html) [20].
+* [Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25].
+* [Partial Wasserstein and Gromov-Wasserstein](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_partial_wass_and_gromov.html) (exact [29] and entropic [3]
+  formulations).
+
+POT provides the following Machine Learning related solvers:
+
+* [Optimal transport for domain
+  adaptation](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_classes.html)
+  with [group lasso regularization](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_classes.html),   [Laplacian regularization](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_laplacian.html) [5] [30] and [semi
+  supervised setting](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_semi_supervised.html).
+* [Linear OT mapping](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_linear_mapping.html) [14] and [Joint OT mapping estimation](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_mapping.html) [8].
+* [Wasserstein Discriminant Analysis](https://pythonot.github.io/auto_examples/others/plot_WDA.html) [11] (requires autograd + pymanopt).
+* [JCPOT algorithm for multi-source domain adaptation with target shift](https://pythonot.github.io/auto_examples/domain-adaptation/plot_otda_jcpot.html) [27].
+
+Some other examples are available in the  [documentation](https://pythonot.github.io/auto_examples/index.html).
+
+#### Using and citing the toolbox
+
+If you use this toolbox in your research and find it useful, please cite POT
+using the following reference:
+```
+Rémi Flamary and Nicolas Courty, POT Python Optimal Transport library, 
+Website: https://pythonot.github.io/, 2017
+```
+
+In Bibtex format:
+```
+@misc{flamary2017pot,
+title={POT Python Optimal Transport library},
+author={Flamary, R{'e}mi and Courty, Nicolas},
+url={https://pythonot.github.io/},
+year={2017}
+}
+```
+
+## Installation
+
+The library has been tested on Linux, MacOSX and Windows. It requires a C++ compiler for building/installing the EMD solver and relies on the following Python modules:
+
+- Numpy (>=1.16)
+- Scipy (>=1.0)
+- Cython (>=0.23)
+- Matplotlib (>=1.5)
+
+#### Pip installation
+
+Note that due to a limitation of pip, `cython` and `numpy` need to be installed
+prior to installing POT. This can be done easily with
+```
+pip install numpy cython
+```
+
+You can install the toolbox through PyPI with:
+```
+pip install POT
+```
+or get the very latest version by running:
+```
+pip install -U https://github.com/PythonOT/POT/archive/master.zip # with --user for user install (no root)
+```
+
+
+
+#### Anaconda installation with conda-forge
+
+If you use the Anaconda python distribution, POT is available in [conda-forge](https://conda-forge.org). To install it and the required dependencies:
+```
+conda install -c conda-forge pot
+```
+
+#### Post installation check
+After a correct installation, you should be able to import the module without errors:
+```python
+import ot
+```
+Note that for easier access the module is name ot instead of pot.
+
+
+### Dependencies
+
+Some sub-modules require additional dependences which are discussed below
+
+* **ot.dr** (Wasserstein dimensionality reduction) depends on autograd and pymanopt that can be installed with:
+```
+pip install pymanopt autograd
+```
+* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed following instructions on [this page](https://docs-cupy.chainer.org/en/stable/install.html).
+
+
+obviously you need CUDA installed and a compatible GPU.
+
+## Examples
+
+### Short examples
+
+* Import the toolbox
+```python
+import ot
+```
+* Compute Wasserstein distances
+```python
+# a,b are 1D histograms (sum to 1 and positive)
+# M is the ground cost matrix
+Wd=ot.emd2(a,b,M) # exact linear program
+Wd_reg=ot.sinkhorn2(a,b,M,reg) # entropic regularized OT
+# if b is a matrix compute all distances to a and return a vector
+```
+* Compute OT matrix
+```python
+# a,b are 1D histograms (sum to 1 and positive)
+# M is the ground cost matrix
+T=ot.emd(a,b,M) # exact linear program
+T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT
+```
+* Compute Wasserstein barycenter
+```python
+# A is a n*d matrix containing d  1D histograms
+# M is the ground cost matrix
+ba=ot.barycenter(A,M,reg) # reg is regularization parameter
+```
+
+### Examples and Notebooks
+
+The examples folder contain several examples and use case for the library. The full documentation with examples and output is available on [https://PythonOT.github.io/](https://PythonOT.github.io/).
+
+
+## Acknowledgements
+
+This toolbox has been created and is maintained by
+
+* [Rémi Flamary](http://remi.flamary.com/)
+* [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/)
+
+The contributors to this library are 
+
+* [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation)
+* [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT)
+* [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation)
+* [Léo Gautheron](https://github.com/aje) (GPU implementation)
+* [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes)
+* [Stanislas Chambon](https://slasnista.github.io/) (DA classes)
+* [Antoine Rolet](https://arolet.github.io/) (EMD solver debug)
+* Erwan Vautier (Gromov-Wasserstein)
+* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic solvers)
+* [Alain Rakotomamonjy](https://sites.google.com/site/alainrakotomamonjy/home)
+* [Vayer Titouan](https://tvayer.github.io/) (Gromov-Wasserstein -, Fused-Gromov-Wasserstein)
+* [Hicham Janati](https://hichamjanati.github.io/) (Unbalanced OT)
+* [Romain Tavenard](https://rtavenar.github.io/) (1d Wasserstein)
+* [Mokhtar Z. Alaya](http://mzalaya.github.io/) (Screenkhorn)
+* [Ievgen Redko](https://ievred.github.io/) (Laplacian DA, JCPOT)
+
+This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages):
+
+* [Gabriel Peyré](http://gpeyre.github.io/) (Wasserstein Barycenters in Matlab)
+* [Nicolas Bonneel](http://liris.cnrs.fr/~nbonneel/) ( C++ code for EMD)
+* [Marco Cuturi](http://marcocuturi.net/) (Sinkhorn Knopp in Matlab/Cuda)
+
+
+## Contributions and code of conduct
+
+Every contribution is welcome and should respect the [contribution guidelines](.github/CONTRIBUTING.md). Each member of the project is expected to follow the [code of conduct](.github/CODE_OF_CONDUCT.md).
+
+## Support
+
+You can ask questions and join the development discussion:
+
+* On the POT [slack channel](https://pot-toolbox.slack.com)
+* On the POT [gitter channel](https://gitter.im/PythonOT/community)
+* On the POT [mailing list](https://mail.python.org/mm3/mailman3/lists/pot.python.org/)
+
+You can also post bug reports and feature requests in Github issues. Make sure to read our [guidelines](.github/CONTRIBUTING.md) first.
+
+## References
+
+[1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. (2011, December). [Displacement interpolation using Lagrangian mass transport](https://people.csail.mit.edu/sparis/publi/2011/sigasia/Bonneel_11_Displacement_Interpolation.pdf). In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. 158). ACM.
+
+[2] Cuturi, M. (2013). [Sinkhorn distances: Lightspeed computation of optimal transport](https://arxiv.org/pdf/1306.0895.pdf). In Advances in Neural Information Processing Systems (pp. 2292-2300).
+
+[3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). [Iterative Bregman projections for regularized transportation problems](https://arxiv.org/pdf/1412.5154.pdf). SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+
+[4] S. Nakhostin, N. Courty, R. Flamary, D. Tuia, T. Corpetti, [Supervised planetary unmixing with optimal transport](https://hal.archives-ouvertes.fr/hal-01377236/document), Whorkshop on Hyperspectral Image and Signal Processing : Evolution in Remote Sensing (WHISPERS), 2016.
+
+[5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, [Optimal Transport for Domain Adaptation](https://arxiv.org/pdf/1507.00504.pdf), in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+
+[6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). [Regularized discrete optimal transport](https://arxiv.org/pdf/1307.5551.pdf). SIAM Journal on Imaging Sciences, 7(3), 1853-1882.
+
+[7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). [Generalized conditional gradient: analysis of convergence and applications](https://arxiv.org/pdf/1510.06567.pdf). arXiv preprint arXiv:1510.06567.
+
+[8] M. Perrot, N. Courty, R. Flamary, A. Habrard (2016), [Mapping estimation for discrete optimal transport](http://remi.flamary.com/biblio/perrot2016mapping.pdf), Neural Information Processing Systems (NIPS).
+
+[9] Schmitzer, B. (2016). [Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems](https://arxiv.org/pdf/1610.06519.pdf). arXiv preprint arXiv:1610.06519.
+
+[10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). [Scaling algorithms for unbalanced transport problems](https://arxiv.org/pdf/1607.05816.pdf). arXiv preprint arXiv:1607.05816.
+
+[11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). [Wasserstein Discriminant Analysis](https://arxiv.org/pdf/1608.08063.pdf). arXiv preprint arXiv:1608.08063.
+
+[12] Gabriel Peyré, Marco Cuturi, and Justin Solomon (2016), [Gromov-Wasserstein averaging of kernel and distance matrices](http://proceedings.mlr.press/v48/peyre16.html)  International Conference on Machine Learning (ICML).
+
+[13] Mémoli, Facundo (2011). [Gromov–Wasserstein distances and the metric approach to object matching](https://media.adelaide.edu.au/acvt/Publications/2011/2011-Gromov%E2%80%93Wasserstein%20Distances%20and%20the%20Metric%20Approach%20to%20Object%20Matching.pdf). Foundations of computational mathematics 11.4 : 417-487.
+
+[14] Knott, M. and Smith, C. S. (1984).[On the optimal mapping of distributions](https://link.springer.com/article/10.1007/BF00934745), Journal of Optimization Theory and Applications Vol 43.
+
+[15] Peyré, G., & Cuturi, M. (2018). [Computational Optimal Transport](https://arxiv.org/pdf/1803.00567.pdf) .
+
+[16] Agueh, M., & Carlier, G. (2011). [Barycenters in the Wasserstein space](https://hal.archives-ouvertes.fr/hal-00637399/document). SIAM Journal on Mathematical Analysis, 43(2), 904-924.
+
+[17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/abs/1710.06276). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
+
+[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](https://arxiv.org/abs/1605.08527). Advances in Neural Information Processing Systems (2016).
+
+[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018)
+
+[20] Cuturi, M. and Doucet, A. (2014) [Fast Computation of Wasserstein Barycenters](http://proceedings.mlr.press/v32/cuturi14.html). International Conference in Machine Learning
+
+[21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015). [Convolutional wasserstein distances: Efficient optimal transportation on geometric domains](https://dl.acm.org/citation.cfm?id=2766963). ACM Transactions on Graphics (TOG), 34(4), 66.
+
+[22] J. Altschuler, J.Weed, P. Rigollet, (2017) [Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration](https://papers.nips.cc/paper/6792-near-linear-time-approximation-algorithms-for-optimal-transport-via-sinkhorn-iteration.pdf), Advances in Neural Information Processing Systems (NIPS) 31
+
+[23] Aude, G., Peyré, G., Cuturi, M., [Learning Generative Models with Sinkhorn Divergences](https://arxiv.org/abs/1706.00292), Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
+
+[24] Vayer, T., Chapel, L., Flamary, R., Tavenard, R. and Courty, N. (2019). [Optimal Transport for structured data with application on graphs](http://proceedings.mlr.press/v97/titouan19a.html) Proceedings of the 36th International Conference on Machine Learning (ICML).
+
+[25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. (2015). [Learning with a Wasserstein Loss](http://cbcl.mit.edu/wasserstein/)  Advances in Neural Information Processing Systems (NIPS).
+
+[26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). [Screening Sinkhorn Algorithm for Regularized Optimal Transport](https://papers.nips.cc/paper/9386-screening-sinkhorn-algorithm-for-regularized-optimal-transport), Advances in Neural Information Processing Systems 33 (NeurIPS).
+
+[27] Redko I., Courty N., Flamary R., Tuia D. (2019). [Optimal Transport for Multi-source Domain Adaptation under Target Shift](http://proceedings.mlr.press/v89/redko19a.html), Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics (AISTATS) 22, 2019.
+
+[28] Caffarelli, L. A., McCann, R. J. (2010). [Free boundaries in optimal transport and Monge-Ampere obstacle problems](http://www.math.toronto.edu/~mccann/papers/annals2010.pdf), Annals of mathematics, 673-730.
+<<<<<<< HEAD
+
+[29] Chapel, L., Alaya, M., Gasso, G. (2020). [Partial Optimal Transport with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), Advances in Neural Information Processing Systems (NeurIPS), 2020.
+=======
+
+[29] Chapel, L., Alaya, M., Gasso, G. (2019). [Partial Gromov-Wasserstein with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), arXiv preprint arXiv:2002.08276.
+
+[30] Flamary R., Courty N., Tuia D., Rakotomamonjy A. (2014). [Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching](https://remi.flamary.com/biblio/flamary2014optlaplace.pdf), NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
+>>>>>>> 55164e9272c9899ca413fca411d410ec28b736b0

From 5a1c9982c4002559b52f67a93e8d2fd9a64fcc1d Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 11:05:20 +0200
Subject: [PATCH 06/10] pep8 bugfix

---
 ot/partial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ot/partial.py b/ot/partial.py
index 590bf5a9d..855524917 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -570,7 +570,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
         if a > 0:
             gamma = min(1, np.divide(-b, 2.0 * a))
         else:
-            if (a+b) < 0:
+            if (a + b) < 0:
                 gamma = 1
             else:
                 gamma = 0

From d136c8e621d50dfd9d1a9e60e0f5f807d8809174 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 11:32:48 +0200
Subject: [PATCH 07/10] modif doctest

---
 ot/partial.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/ot/partial.py b/ot/partial.py
index 855524917..a22bb1f68 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -500,8 +500,8 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     >>> np.round(partial_gromov_wasserstein(C1, C2, a, b, m=0.25),2)
     array([[0.  , 0.  , 0.  , 0.  ],
            [0.  , 0.  , 0.  , 0.  ],
-           [0.  , 0.  , 0.  , 0.  ],
-           [0.  , 0.  , 0.  , 0.25]])
+           [0.  , 0.  , 0.25, 0.  ],
+           [0.  , 0.  , 0.  , 0.]])
 
     References
     ----------
@@ -899,13 +899,12 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
     >>> y = np.array([3,2,98,199]).reshape((-1,1))
     >>> C1 = sp.spatial.distance.cdist(x, x)
     >>> C2 = sp.spatial.distance.cdist(y, y)
-    >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b,50), 2)
+    >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50), 2)
     array([[0.12, 0.13, 0.  , 0.  ],
            [0.13, 0.12, 0.  , 0.  ],
            [0.  , 0.  , 0.25, 0.  ],
            [0.  , 0.  , 0.  , 0.25]])
-    >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50, m=0.25),
-                 2)
+    >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50,0.25), 2)
     array([[0.02, 0.03, 0.  , 0.03],
            [0.03, 0.03, 0.  , 0.03],
            [0.  , 0.  , 0.03, 0.  ],

From dcbd1b5bfce5142d20dc4b5a3d63eb903c08829f Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Wed, 14 Oct 2020 11:48:04 +0200
Subject: [PATCH 08/10] fix bugtests

---
 ot/partial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ot/partial.py b/ot/partial.py
index a22bb1f68..a5b1165af 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -501,7 +501,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     array([[0.  , 0.  , 0.  , 0.  ],
            [0.  , 0.  , 0.  , 0.  ],
            [0.  , 0.  , 0.25, 0.  ],
-           [0.  , 0.  , 0.  , 0.]])
+           [0.  , 0.  , 0.  , 0.  ]])
 
     References
     ----------

From eedf08d4267472a401b2a3c6aadb1246f2d15147 Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Thu, 15 Oct 2020 10:21:48 +0200
Subject: [PATCH 09/10] update on test_partial and test on the numerical
 precision on ot/partial

---
 ot/partial.py        | 15 ++++++++-------
 test/test_partial.py |  6 ++++--
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/ot/partial.py b/ot/partial.py
index a5b1165af..814d7797c 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -535,12 +535,12 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
 
     while (err > tol and cpt < numItermax):
 
-        Gprev = G0.copy()
+        Gprev = np.copy(G0)
 
         M = gwgrad_partial(C1, C2, G0)
         M_emd = np.zeros(dim_G_extended)
         M_emd[:len(p), :len(q)] = M
-        M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5
+        M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e2
         M_emd = np.asarray(M_emd, dtype=np.float64)
 
         Gc, logemd = emd(p_extended, q_extended, M_emd, log=True, **kwargs)
@@ -563,20 +563,21 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
                                                  gwloss_partial(C1, C2, G0)))
 
         deltaG = G0 - Gprev
-        grad = gwgrad_partial(C1, C2, deltaG)
         a = gwloss_partial(C1, C2, deltaG)
-        b = 2 * np.sum(grad * Gprev)
-
-        if a > 0:
+        b = 2 * np.sum(M * deltaG)
+        if b > 0:  # due to numerical precision
+            gamma = 0
+            cpt = numItermax
+        elif a > 0:
             gamma = min(1, np.divide(-b, 2.0 * a))
         else:
             if (a + b) < 0:
                 gamma = 1
             else:
                 gamma = 0
+                cpt = numItermax
 
         G0 = Gprev + gamma * deltaG
-
         cpt += 1
 
     if log:
diff --git a/test/test_partial.py b/test/test_partial.py
index 510e08147..121f345c9 100755
--- a/test/test_partial.py
+++ b/test/test_partial.py
@@ -51,10 +51,12 @@ def test_raise_errors():
         ot.partial.partial_gromov_wasserstein(M, M, p, q, m=-1, log=True)
 
     with pytest.raises(ValueError):
-        ot.partial.entropic_partial_gromov_wasserstein(M, M, p, q, reg=1, m=2, log=True)
+        ot.partial.entropic_partial_gromov_wasserstein(M, M, p, q, reg=1, m=2,
+                                                       log=True)
 
     with pytest.raises(ValueError):
-        ot.partial.entropic_partial_gromov_wasserstein(M, M, p, q, reg=1, m=-1, log=True)
+        ot.partial.entropic_partial_gromov_wasserstein(M, M, p, q, reg=1, m=-1,
+                                                       log=True)
 
 
 def test_partial_wasserstein_lagrange():

From 26a843f09bc87befac343feb040c54aaac31976d Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Tue, 20 Oct 2020 10:01:43 +0200
Subject: [PATCH 10/10] resolve merge pb

---
 README.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/README.md b/README.md
index da8f20b4e..95c96d969 100644
--- a/README.md
+++ b/README.md
@@ -259,12 +259,7 @@ You can also post bug reports and feature requests in Github issues. Make sure t
 [27] Redko I., Courty N., Flamary R., Tuia D. (2019). [Optimal Transport for Multi-source Domain Adaptation under Target Shift](http://proceedings.mlr.press/v89/redko19a.html), Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics (AISTATS) 22, 2019.
 
 [28] Caffarelli, L. A., McCann, R. J. (2010). [Free boundaries in optimal transport and Monge-Ampere obstacle problems](http://www.math.toronto.edu/~mccann/papers/annals2010.pdf), Annals of mathematics, 673-730.
-<<<<<<< HEAD
 
 [29] Chapel, L., Alaya, M., Gasso, G. (2020). [Partial Optimal Transport with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), Advances in Neural Information Processing Systems (NeurIPS), 2020.
-=======
-
-[29] Chapel, L., Alaya, M., Gasso, G. (2019). [Partial Gromov-Wasserstein with Applications on Positive-Unlabeled Learning](https://arxiv.org/abs/2002.08276), arXiv preprint arXiv:2002.08276.
 
 [30] Flamary R., Courty N., Tuia D., Rakotomamonjy A. (2014). [Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching](https://remi.flamary.com/biblio/flamary2014optlaplace.pdf), NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
->>>>>>> 55164e9272c9899ca413fca411d410ec28b736b0