From 793cf60f91041cc4045b78d19c26f8dacf71b91f Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Sat, 1 Jun 2019 15:53:41 +0530
Subject: [PATCH 01/22] add Cohens Kappa Metric

---
 tensorflow_addons/metrics/cohens_kappa.py | 140 ++++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 tensorflow_addons/metrics/cohens_kappa.py

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
new file mode 100644
index 0000000000..b7334768f1
--- /dev/null
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -0,0 +1,140 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implements Cohen's Kappa"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import tensorflow as tf
+import tensorflow.keras.backend as K
+from tensorflow.math import confusion_matrix
+from tensorflow.keras.metrics import Metric
+from tensorflow_addons.utils import keras_utils
+
+
+@keras_utils.register_keras_custom_object
+class CohensKappa(Metric):
+  """Computes Kappa score between two raters.
+
+  The score lies in the range [-1,1]. A score of -1 represents
+  complete disagreement between two raters whereas a score of 1 
+  represents complete agreement between the two raters. 
+  A score of 0 means agreement by chance.
+  
+  Note: As of now, this implementation considers all labels 
+  while calculating the Cohen's Kappa score.
+
+  Usage:
+  ```python
+  actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
+  preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
+
+  m = tf.keras.metrics.CohensKappa()
+  m.update_state(actuals, preds, "quadratic")
+  print('Final result: ', m.result().numpy()) # Result: 0.68932
+  ```
+  Usage with tf.keras API:
+  ```python
+  model = keras.models.Model(inputs, outputs)
+  model.add_metric(tf.keras.metrics.CohensKappa(name='kp_score')(outputs))
+  model.compile('sgd', loss='mse')
+  ```
+
+  Args:
+    y1 : array, shape = [n_samples]
+      Labels assigned by the first annotator.
+    y2 : array, shape = [n_samples]
+      Labels assigned by the second annotator. The kappa statistic is
+      symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
+    sample_weight(optional) : None or str 
+      A string denoting the type of weighting to be used.
+      Valid values for this parameter are [None, 'linear', 'quadratic'].
+      Default value is None.
+
+
+  Returns:
+    kappa_score : float
+      The kappa statistic, which is a number between -1 and 1. The maximum
+      value means complete agreement; zero or lower means chance agreement.
+
+
+  Raises:
+    ValueError: If the value passed for `sample_weight` is invalid
+      i.e. not any one of [None, 'linear', 'quadratic']
+
+  """
+  def __init__(self, name='cohens_kappa', dtype=tf.float32,):
+    super(CohensKappa, self).__init__(name=name, dtype=dtype)
+    self.kappa_score = self.add_weight('kappa_score', 
+                                       initializer=None)
+
+  def update_state(self, y_true, y_pred, sample_weight=None):
+    y_true = tf.cast(y_true, dtype=tf.int32)
+    y_pred = tf.cast(y_pred, dtype=tf.int32)
+
+    # 1. Get the confusion matrix
+    conf_mtx = confusion_matrix(labels=y_true, predictions=y_pred)     
+    nb_ratings = tf.shape(conf_mtx)[0]
+    
+    # 2. Create a weight matrix
+    if sample_weight is None:
+      weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
+      diagonal = tf.zeros([5], dtype=tf.int32)
+      weight_mtx = tf.linalg.set_diag(weight_mtx, diagonal=diagonal)
+      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+    
+    elif sample_weight == "linear": 
+      weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
+      weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
+      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+      weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
+      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+    
+    elif sample_weight == "quadratic":
+      weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
+      weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
+      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+      weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
+      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+    
+    else:
+      raise ValueError("Unknown kappa weighting type.")
+    
+    # 3. Get counts
+    actual_ratings_hist = K.sum(conf_mtx, axis=1)
+    pred_ratings_hist = K.sum(conf_mtx, axis=0)
+    
+    # 4. Get the outer product
+    out_prod = pred_ratings_hist[..., None] * actual_ratings_hist[None, ...]
+    
+    # 5. Normalize the confusion matrxi and outer product
+    conf_mtx = conf_mtx / K.sum(conf_mtx)
+    out_prod = out_prod / K.sum(out_prod)
+    
+    conf_mtx = tf.cast(conf_mtx, dtype=tf.float32)
+    out_prod = tf.cast(out_prod, dtype=tf.float32)
+    
+    # 6. Calculate Kappa score
+    numerator = K.sum(conf_mtx * weight_mtx)
+    denominator = K.sum(out_prod * weight_mtx)          
+    kp = 1-(numerator/denominator)
+    
+    return self.kappa_score.assign(kp)
+
+
+  def result(self):
+    return self.kappa_score
\ No newline at end of file

From b3c6e36bf14a10f8942e92a2b7da45715b3f7988 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Sat, 1 Jun 2019 15:54:00 +0530
Subject: [PATCH 02/22] add tests for Cohens Kappa Metric

---
 .../metrics/cohens_kappa_test.py              | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 tensorflow_addons/metrics/cohens_kappa_test.py

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
new file mode 100644
index 0000000000..0b6907c449
--- /dev/null
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -0,0 +1,91 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Cohen's Kappa Metric."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+from tensorflow_addons.metrics import CohensKappa
+from tensorflow_addons.utils import test_utils
+
+@test_utils.run_all_in_graph_and_eager_modes
+class CohensKappaTest(tf.test.TestCase):
+  def test_config(self):
+    kp_obj = CohensKappa(name='cohens_kappa')
+    self.assertEqual(kp_obj.name, 'cohens_kappa')
+
+  def test_kappa(self):
+    kp_obj = CohensKappa()
+
+    # random score
+    actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
+    preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
+    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+    preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+
+    score1 = kp_obj.update_state(actuals, preds, sample_weight=None)
+    score2 = kp_obj.update_state(actuals, preds, sample_weight='linear')
+    score3 = kp_obj.update_state(actuals, preds, sample_weight='quadratic')
+
+    score1 = self.evaluate(score1)
+    score2 = self.evaluate(score2)
+    score3 = self.evaluate(score3)
+
+    self.assertAlmostEqual(score1, 0.61904, 4)
+    self.assertAlmostEqual(score2, 0.62790, 4)
+    self.assertAlmostEqual(score3, 0.68932, 4)
+
+
+    # perfect score
+    actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
+    preds = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
+    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+    preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+
+    score1 = kp_obj.update_state(actuals, preds, sample_weight=None)
+    score2 = kp_obj.update_state(actuals, preds, sample_weight='linear')
+    score3 = kp_obj.update_state(actuals, preds, sample_weight='quadratic')
+
+    score1 = self.evaluate(score1)
+    score2 = self.evaluate(score2)
+    score3 = self.evaluate(score3)
+
+    self.assertAlmostEqual(score1, 1.0, 4)
+    self.assertAlmostEqual(score2, 1.0, 4)
+    self.assertAlmostEqual(score3, 1.0, 4)
+
+
+    # worse than random 
+    actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
+    preds = np.array([1, 2, 4, 1, 3, 3, 4, 4], dtype=np.int32)
+    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+    preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+
+    score1 = kp_obj.update_state(actuals, preds, sample_weight=None)
+    score2 = kp_obj.update_state(actuals, preds, sample_weight='linear')
+    score3 = kp_obj.update_state(actuals, preds, sample_weight='quadratic')
+
+    score1 = self.evaluate(score1)
+    score2 = self.evaluate(score2)
+    score3 = self.evaluate(score3)
+
+    self.assertAlmostEqual(score1, -0.33333, 4)
+    self.assertAlmostEqual(score2, -0.52380, 4)
+    self.assertAlmostEqual(score3, -0.72727, 4)
+

From 56394d1d9ef548ee5111135d29ed9a9f38a63b1d Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Sat, 1 Jun 2019 15:54:25 +0530
Subject: [PATCH 03/22] include Cohens Kappa and tests

---
 tensorflow_addons/metrics/BUILD | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow_addons/metrics/BUILD b/tensorflow_addons/metrics/BUILD
index 93979903a1..b1208e465e 100644
--- a/tensorflow_addons/metrics/BUILD
+++ b/tensorflow_addons/metrics/BUILD
@@ -6,9 +6,23 @@ py_library(
     name = "metrics",
     srcs = [
         "__init__.py",
+        "cohens_kappa.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow_addons/utils",
     ],
 )
+
+py_test(
+    name = "cohens_kappa_test",
+    size = "small",
+    srcs = [
+        "cohens_kappa_test.py",
+    ],
+    main = "cohens_kappa_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":metrics",
+    ],
+)

From d669b116c577e8357719458f027288541fd32c86 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Mon, 3 Jun 2019 19:14:51 +0530
Subject: [PATCH 04/22] code refactor and remove extra lines

---
 tensorflow_addons/metrics/cohens_kappa.py | 36 ++++++++++-------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index b7334768f1..44113712f5 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -30,7 +30,7 @@
 class CohensKappa(Metric):
   """Computes Kappa score between two raters.
 
-  The score lies in the range [-1,1]. A score of -1 represents
+  The score lies in the range [-1, 1]. A score of -1 represents
   complete disagreement between two raters whereas a score of 1 
   represents complete agreement between the two raters. 
   A score of 0 means agreement by chance.
@@ -65,19 +65,17 @@ class CohensKappa(Metric):
       Valid values for this parameter are [None, 'linear', 'quadratic'].
       Default value is None.
 
-
   Returns:
     kappa_score : float
       The kappa statistic, which is a number between -1 and 1. The maximum
       value means complete agreement; zero or lower means chance agreement.
 
-
   Raises:
     ValueError: If the value passed for `sample_weight` is invalid
       i.e. not any one of [None, 'linear', 'quadratic']
 
   """
-  def __init__(self, name='cohens_kappa', dtype=tf.float32,):
+  def __init__(self, name='cohens_kappa', dtype=tf.float32):
     super(CohensKappa, self).__init__(name=name, dtype=dtype)
     self.kappa_score = self.add_weight('kappa_score', 
                                        initializer=None)
@@ -86,34 +84,31 @@ def update_state(self, y_true, y_pred, sample_weight=None):
     y_true = tf.cast(y_true, dtype=tf.int32)
     y_pred = tf.cast(y_pred, dtype=tf.int32)
 
+    # check if weighting type is valid
+    if sample_weight not in (None, 'linear', 'quadratic'):
+      raise ValueError("Unknown kappa weighting type.")
+
     # 1. Get the confusion matrix
     conf_mtx = confusion_matrix(labels=y_true, predictions=y_pred)     
     nb_ratings = tf.shape(conf_mtx)[0]
+    weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
     
     # 2. Create a weight matrix
     if sample_weight is None:
-      weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
       diagonal = tf.zeros([5], dtype=tf.int32)
       weight_mtx = tf.linalg.set_diag(weight_mtx, diagonal=diagonal)
       weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
     
-    elif sample_weight == "linear": 
-      weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
-      weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
-      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
-      weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
-      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
-    
-    elif sample_weight == "quadratic":
-      weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
+    else:
       weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
       weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
-      weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
+
+      if sample_weight=='linear':
+        weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
+      else:
+        weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
       weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
     
-    else:
-      raise ValueError("Unknown kappa weighting type.")
-    
     # 3. Get counts
     actual_ratings_hist = K.sum(conf_mtx, axis=1)
     pred_ratings_hist = K.sum(conf_mtx, axis=0)
@@ -121,7 +116,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
     # 4. Get the outer product
     out_prod = pred_ratings_hist[..., None] * actual_ratings_hist[None, ...]
     
-    # 5. Normalize the confusion matrxi and outer product
+    # 5. Normalize the confusion matrix and outer product
     conf_mtx = conf_mtx / K.sum(conf_mtx)
     out_prod = out_prod / K.sum(out_prod)
     
@@ -130,11 +125,10 @@ def update_state(self, y_true, y_pred, sample_weight=None):
     
     # 6. Calculate Kappa score
     numerator = K.sum(conf_mtx * weight_mtx)
-    denominator = K.sum(out_prod * weight_mtx)          
+    denominator = K.sum(out_prod * weight_mtx)
     kp = 1-(numerator/denominator)
     
     return self.kappa_score.assign(kp)
 
-
   def result(self):
     return self.kappa_score
\ No newline at end of file

From 142e61e785e6a81a2041ff0c21c8c81a1564a48a Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Mon, 3 Jun 2019 19:15:18 +0530
Subject: [PATCH 05/22] add separate tests for each case

---
 tensorflow_addons/metrics/cohens_kappa_test.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index 0b6907c449..4d5801ecff 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -30,9 +30,7 @@ def test_config(self):
     kp_obj = CohensKappa(name='cohens_kappa')
     self.assertEqual(kp_obj.name, 'cohens_kappa')
 
-  def test_kappa(self):
-    kp_obj = CohensKappa()
-
+  def test_kappa_random_score(self):
     # random score
     actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
     preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
@@ -51,7 +49,7 @@ def test_kappa(self):
     self.assertAlmostEqual(score2, 0.62790, 4)
     self.assertAlmostEqual(score3, 0.68932, 4)
 
-
+  def test_kappa_perfect_score(self):
     # perfect score
     actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     preds = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
@@ -70,7 +68,7 @@ def test_kappa(self):
     self.assertAlmostEqual(score2, 1.0, 4)
     self.assertAlmostEqual(score3, 1.0, 4)
 
-
+  def test_kappa_worse_than_random(self)
     # worse than random 
     actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     preds = np.array([1, 2, 4, 1, 3, 3, 4, 4], dtype=np.int32)
@@ -88,4 +86,3 @@ def test_kappa(self):
     self.assertAlmostEqual(score1, -0.33333, 4)
     self.assertAlmostEqual(score2, -0.52380, 4)
     self.assertAlmostEqual(score3, -0.72727, 4)
-

From 29757da0bdecce85608507cfa65a9d00f7a3a1e9 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Mon, 3 Jun 2019 19:22:46 +0530
Subject: [PATCH 06/22] refactor code

---
 tensorflow_addons/metrics/cohens_kappa_test.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index 4d5801ecff..d4a77848b9 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -31,6 +31,7 @@ def test_config(self):
     self.assertEqual(kp_obj.name, 'cohens_kappa')
 
   def test_kappa_random_score(self):
+    kp_obj = CohensKappa()
     # random score
     actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
     preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
@@ -50,6 +51,7 @@ def test_kappa_random_score(self):
     self.assertAlmostEqual(score3, 0.68932, 4)
 
   def test_kappa_perfect_score(self):
+    kp_obj = CohensKappa()
     # perfect score
     actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     preds = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
@@ -68,8 +70,9 @@ def test_kappa_perfect_score(self):
     self.assertAlmostEqual(score2, 1.0, 4)
     self.assertAlmostEqual(score3, 1.0, 4)
 
-  def test_kappa_worse_than_random(self)
-    # worse than random 
+  def test_kappa_worse_than_random(self):
+    kp_obj = CohensKappa()
+    #worse than random 
     actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     preds = np.array([1, 2, 4, 1, 3, 3, 4, 4], dtype=np.int32)
     actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
@@ -86,3 +89,6 @@ def test_kappa_worse_than_random(self)
     self.assertAlmostEqual(score1, -0.33333, 4)
     self.assertAlmostEqual(score2, -0.52380, 4)
     self.assertAlmostEqual(score3, -0.72727, 4)
+
+if __name__ == '__main__':
+  tf.test.main()
\ No newline at end of file

From ec2ed3709453e642d7cbdc985000e7ab1e5af683 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Mon, 10 Jun 2019 21:34:31 +0530
Subject: [PATCH 07/22] make the metric stateful

---
 tensorflow_addons/metrics/cohens_kappa.py | 91 +++++++++++++++--------
 1 file changed, 59 insertions(+), 32 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index 44113712f5..fd3171fa66 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -43,27 +43,22 @@ class CohensKappa(Metric):
   actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
   preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
 
-  m = tf.keras.metrics.CohensKappa()
+  m = tf.keras.metrics.CohensKappa(num_classes=5)
   m.update_state(actuals, preds, "quadratic")
   print('Final result: ', m.result().numpy()) # Result: 0.68932
   ```
   Usage with tf.keras API:
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.add_metric(tf.keras.metrics.CohensKappa(name='kp_score')(outputs))
+  model.add_metric(tf.keras.metrics.CohensKappa(num_classes=5)(outputs))
   model.compile('sgd', loss='mse')
   ```
 
   Args:
-    y1 : array, shape = [n_samples]
-      Labels assigned by the first annotator.
-    y2 : array, shape = [n_samples]
-      Labels assigned by the second annotator. The kappa statistic is
-      symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
-    sample_weight(optional) : None or str 
-      A string denoting the type of weighting to be used.
-      Valid values for this parameter are [None, 'linear', 'quadratic'].
-      Default value is None.
+    num_classes : Number of unique classes in your dataset
+    weightage   : Type of weighting to be considered for calculating 
+      kappa statistics. A valid value is one of [None, 'linear', 'quadratic'].
+      Defaults to None
 
   Returns:
     kappa_score : float
@@ -71,30 +66,65 @@ class CohensKappa(Metric):
       value means complete agreement; zero or lower means chance agreement.
 
   Raises:
-    ValueError: If the value passed for `sample_weight` is invalid
+    ValueError: If the value passed for `weightage` is invalid
       i.e. not any one of [None, 'linear', 'quadratic']
 
   """
-  def __init__(self, name='cohens_kappa', dtype=tf.float32):
+  def __init__(self,
+               num_classes,
+               name='cohens_kappa', 
+               weightage=None, 
+               dtype=tf.float32):
     super(CohensKappa, self).__init__(name=name, dtype=dtype)
-    self.kappa_score = self.add_weight('kappa_score', 
-                                       initializer=None)
+    
+    if weightage not in (None, 'linear', 'quadratic'):
+      raise ValueError("Unknown kappa weighting type.")
+    else:
+      self.weightage = weightage
+   
+    self.num_classes = num_classes
+    self.conf_mtx = self.add_weight('conf_mtx',
+                                    shape=(self.num_classes, self.num_classes),
+                                    initializer=tf.initializers.zeros,
+                                    dtype=tf.int32)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
+    """Accumulates the confusion matrix condition statistics.
+    
+    Args:
+      y1 : array, shape = [n_samples]
+           Labels assigned by the first annotator.
+      y2 : array, shape = [n_samples]
+           Labels assigned by the second annotator. The kappa statistic is
+           symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
+      sample_weight(optional) : for weighting labels in confusion matrix
+           Default is None. Check tf.math.consfusion_matrix for details 
+    
+    Returns:
+      Update op.
+      
+    """
     y_true = tf.cast(y_true, dtype=tf.int32)
     y_pred = tf.cast(y_pred, dtype=tf.int32)
+    
+    if y_true.shape != y_pred.shape:
+      raise ValueError("Number of samples in y_true and y_pred are different")
+    
+    # compute the new values of the confusion matrix
+    new_conf_mtx = confusion_matrix(labels=y_true, 
+                                     predictions=y_pred,
+                                     num_classes=self.num_classes,
+                                     weights=sample_weight)
+  
+    # update the values in the orifinal confusion matrix
+    return self.conf_mtx.assign_add(new_conf_mtx)
 
-    # check if weighting type is valid
-    if sample_weight not in (None, 'linear', 'quadratic'):
-      raise ValueError("Unknown kappa weighting type.")
-
-    # 1. Get the confusion matrix
-    conf_mtx = confusion_matrix(labels=y_true, predictions=y_pred)     
-    nb_ratings = tf.shape(conf_mtx)[0]
+  def result(self):    
+    nb_ratings = tf.shape(self.conf_mtx)[0]
     weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
     
     # 2. Create a weight matrix
-    if sample_weight is None:
+    if self.weightage is None:
       diagonal = tf.zeros([5], dtype=tf.int32)
       weight_mtx = tf.linalg.set_diag(weight_mtx, diagonal=diagonal)
       weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
@@ -103,21 +133,21 @@ def update_state(self, y_true, y_pred, sample_weight=None):
       weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
       weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
 
-      if sample_weight=='linear':
+      if self.weightage=='linear':
         weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
       else:
         weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
       weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
     
     # 3. Get counts
-    actual_ratings_hist = K.sum(conf_mtx, axis=1)
-    pred_ratings_hist = K.sum(conf_mtx, axis=0)
+    actual_ratings_hist = K.sum(self.conf_mtx, axis=1)
+    pred_ratings_hist = K.sum(self.conf_mtx, axis=0)
     
     # 4. Get the outer product
     out_prod = pred_ratings_hist[..., None] * actual_ratings_hist[None, ...]
     
     # 5. Normalize the confusion matrix and outer product
-    conf_mtx = conf_mtx / K.sum(conf_mtx)
+    conf_mtx = self.conf_mtx / K.sum(self.conf_mtx)
     out_prod = out_prod / K.sum(out_prod)
     
     conf_mtx = tf.cast(conf_mtx, dtype=tf.float32)
@@ -125,10 +155,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
     
     # 6. Calculate Kappa score
     numerator = K.sum(conf_mtx * weight_mtx)
-    denominator = K.sum(out_prod * weight_mtx)
+    denominator = K.sum(out_prod * weight_mtx) 
     kp = 1-(numerator/denominator)
     
-    return self.kappa_score.assign(kp)
-
-  def result(self):
-    return self.kappa_score
\ No newline at end of file
+    return kp
\ No newline at end of file

From 2b679c672ed1583130c2b270cba1b360af6845f0 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Mon, 10 Jun 2019 21:35:56 +0530
Subject: [PATCH 08/22] refactor tests

---
 .../metrics/cohens_kappa_test.py              | 96 ++++++++++++-------
 1 file changed, 59 insertions(+), 37 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index d4a77848b9..8585362c51 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -29,66 +29,88 @@ class CohensKappaTest(tf.test.TestCase):
   def test_config(self):
     kp_obj = CohensKappa(name='cohens_kappa')
     self.assertEqual(kp_obj.name, 'cohens_kappa')
-
+  
   def test_kappa_random_score(self):
-    kp_obj = CohensKappa()
+    kp_obj1 = CohensKappa(num_classes=5)
+    kp_obj2 = CohensKappa(num_classes=5, weightage='linear')
+    kp_obj3 = CohensKappa(num_classes=5, weightage='quadratic')
+    
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
+    
     # random score
     actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
     preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
     actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
     preds = tf.convert_to_tensor(preds, dtype=tf.int32)
 
-    score1 = kp_obj.update_state(actuals, preds, sample_weight=None)
-    score2 = kp_obj.update_state(actuals, preds, sample_weight='linear')
-    score3 = kp_obj.update_state(actuals, preds, sample_weight='quadratic')
-
-    score1 = self.evaluate(score1)
-    score2 = self.evaluate(score2)
-    score3 = self.evaluate(score3)
+    update_op1 = kp_obj1.update_state(actuals, preds)
+    update_op2 = kp_obj2.update_state(actuals, preds)
+    update_op3 = kp_obj3.update_state(actuals, preds)
+    
+    self.evaluate(update_op1)
+    self.evaluate(update_op2)
+    self.evaluate(update_op3)
 
-    self.assertAlmostEqual(score1, 0.61904, 4)
-    self.assertAlmostEqual(score2, 0.62790, 4)
-    self.assertAlmostEqual(score3, 0.68932, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 0.61904, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 0.62790, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 0.68932, 4)
 
   def test_kappa_perfect_score(self):
-    kp_obj = CohensKappa()
+    kp_obj1 = CohensKappa(num_classes=5)
+    kp_obj2 = CohensKappa(num_classes=5, weightage='linear')
+    kp_obj3 = CohensKappa(num_classes=5, weightage='quadratic')
+    
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
+    
     # perfect score
     actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     preds = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
     preds = tf.convert_to_tensor(preds, dtype=tf.int32)
 
-    score1 = kp_obj.update_state(actuals, preds, sample_weight=None)
-    score2 = kp_obj.update_state(actuals, preds, sample_weight='linear')
-    score3 = kp_obj.update_state(actuals, preds, sample_weight='quadratic')
-
-    score1 = self.evaluate(score1)
-    score2 = self.evaluate(score2)
-    score3 = self.evaluate(score3)
-
-    self.assertAlmostEqual(score1, 1.0, 4)
-    self.assertAlmostEqual(score2, 1.0, 4)
-    self.assertAlmostEqual(score3, 1.0, 4)
+    update_op1 = kp_obj1.update_state(actuals, preds)
+    update_op2 = kp_obj2.update_state(actuals, preds)
+    update_op3 = kp_obj3.update_state(actuals, preds)
+    
+    self.evaluate(update_op1)
+    self.evaluate(update_op2)
+    self.evaluate(update_op3)
+    
+    
+    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 1.0, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 1.0, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 1.0, 4)
 
   def test_kappa_worse_than_random(self):
-    kp_obj = CohensKappa()
-    #worse than random 
+    kp_obj1 = CohensKappa(num_classes=5)
+    kp_obj2 = CohensKappa(num_classes=5, weightage='linear')
+    kp_obj3 = CohensKappa(num_classes=5, weightage='quadratic')
+    
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
+    self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
+    
+    # worse than random 
     actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
     preds = np.array([1, 2, 4, 1, 3, 3, 4, 4], dtype=np.int32)
     actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
     preds = tf.convert_to_tensor(preds, dtype=tf.int32)
 
-    score1 = kp_obj.update_state(actuals, preds, sample_weight=None)
-    score2 = kp_obj.update_state(actuals, preds, sample_weight='linear')
-    score3 = kp_obj.update_state(actuals, preds, sample_weight='quadratic')
-
-    score1 = self.evaluate(score1)
-    score2 = self.evaluate(score2)
-    score3 = self.evaluate(score3)
+    update_op1 = kp_obj1.update_state(actuals, preds)
+    update_op2 = kp_obj2.update_state(actuals, preds)
+    update_op3 = kp_obj3.update_state(actuals, preds)
+    
+    self.evaluate(update_op1)
+    self.evaluate(update_op2)
+    self.evaluate(update_op3)
 
-    self.assertAlmostEqual(score1, -0.33333, 4)
-    self.assertAlmostEqual(score2, -0.52380, 4)
-    self.assertAlmostEqual(score3, -0.72727, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.33333, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.52380, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.72727, 4)
 
 if __name__ == '__main__':
-  tf.test.main()
\ No newline at end of file
+  tf.test.main() 
\ No newline at end of file

From 85f641ef3c5455c238cc1b1890969906adcd8c45 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 18:02:10 +0530
Subject: [PATCH 09/22] add get_config and reset_states methods

---
 tensorflow_addons/metrics/cohens_kappa.py | 43 ++++++++++++++++-------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index fd3171fa66..96d181c853 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -27,7 +27,7 @@
 
 
 @keras_utils.register_keras_custom_object
-class CohensKappa(Metric):
+class CohenKappa(Metric):
   """Computes Kappa score between two raters.
 
   The score lies in the range [-1, 1]. A score of -1 represents
@@ -43,14 +43,14 @@ class CohensKappa(Metric):
   actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
   preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
 
-  m = tf.keras.metrics.CohensKappa(num_classes=5)
+  m = tf.keras.metrics.CohenKappa(num_classes=5)
   m.update_state(actuals, preds, "quadratic")
   print('Final result: ', m.result().numpy()) # Result: 0.68932
   ```
   Usage with tf.keras API:
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.add_metric(tf.keras.metrics.CohensKappa(num_classes=5)(outputs))
+  model.add_metric(tf.keras.metrics.CohenKappa(num_classes=5)(outputs))
   model.compile('sgd', loss='mse')
   ```
 
@@ -72,10 +72,10 @@ class CohensKappa(Metric):
   """
   def __init__(self,
                num_classes,
-               name='cohens_kappa', 
+               name='cohen_kappa', 
                weightage=None, 
                dtype=tf.float32):
-    super(CohensKappa, self).__init__(name=name, dtype=dtype)
+    super(CohenKappa, self).__init__(name=name, dtype=dtype)
     
     if weightage not in (None, 'linear', 'quadratic'):
       raise ValueError("Unknown kappa weighting type.")
@@ -92,13 +92,17 @@ def update_state(self, y_true, y_pred, sample_weight=None):
     """Accumulates the confusion matrix condition statistics.
     
     Args:
-      y1 : array, shape = [n_samples]
-           Labels assigned by the first annotator.
-      y2 : array, shape = [n_samples]
-           Labels assigned by the second annotator. The kappa statistic is
-           symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
+      y_true : array, shape = [n_samples]
+               Labels assigned by the first annotator.
+      y_pred : array, shape = [n_samples]
+               Labels assigned by the second annotator. The kappa statistic 
+               is symmetric, so swapping ``y_true`` and ``y_pred`` doesn't 
+               change the value.
       sample_weight(optional) : for weighting labels in confusion matrix
-           Default is None. Check tf.math.consfusion_matrix for details 
+               Default is None. The dtype for weights should be the same as 
+               the dtype for confusion matrix.Check tf.math.consfusion_matrix 
+               for details
+               
     
     Returns:
       Update op.
@@ -157,5 +161,20 @@ def result(self):
     numerator = K.sum(conf_mtx * weight_mtx)
     denominator = K.sum(out_prod * weight_mtx) 
     kp = 1-(numerator/denominator)
+    return kp
+  
+  def get_config(self):
+    """Returns the serializable config of the metric."""
+    
+    config = {
+        "num_classes": self.num_classes,
+        "weightage": self.weightage,
+    }
+    base_config = super(CohenKappa, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+  
+  def reset_states(self):
+    """Resets all of the metric state variables."""
     
-    return kp
\ No newline at end of file
+    for v in self.variables:
+      K.set_value(v ,np.zeros((self.num_classes, self.num_classes), np.int32))
\ No newline at end of file

From db6bddd07f13dfde14e1d342fe56ed82d3a261b8 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 18:02:34 +0530
Subject: [PATCH 10/22] refactor code and add test for sample_weight param

---
 .../metrics/cohens_kappa_test.py              | 124 ++++++++++--------
 1 file changed, 67 insertions(+), 57 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index 8585362c51..cc9c8d7e3a 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -25,92 +25,102 @@
 from tensorflow_addons.utils import test_utils
 
 @test_utils.run_all_in_graph_and_eager_modes
-class CohensKappaTest(tf.test.TestCase):
+class CohenKappaTest(tf.test.TestCase):
   def test_config(self):
-    kp_obj = CohensKappa(name='cohens_kappa')
-    self.assertEqual(kp_obj.name, 'cohens_kappa')
-  
-  def test_kappa_random_score(self):
-    kp_obj1 = CohensKappa(num_classes=5)
-    kp_obj2 = CohensKappa(num_classes=5, weightage='linear')
-    kp_obj3 = CohensKappa(num_classes=5, weightage='quadratic')
+    kp_obj = CohenKappa(name='cohen_kappa', num_classes=5)
+    self.assertEqual(kp_obj.name, 'cohen_kappa')
+    self.assertEqual(kp_obj.dtype, tf.float32)
+    self.assertEqual(kp_obj.num_classes, 5)
+    
+    # Check save and restore config
+    kb_obj2 = CohenKappa.from_config(kp_obj.get_config())
+    self.assertEqual(kb_obj2.name, 'cohen_kappa')
+    self.assertEqual(kb_obj2.dtype, tf.float32)
+    self.assertEqual(kp_obj.num_classes, 5)
+    
+  def initialize_vars(self):
+    kp_obj1 = CohenKappa(num_classes=5)
+    kp_obj2 = CohenKappa(num_classes=5, weightage='linear')
+    kp_obj3 = CohenKappa(num_classes=5, weightage='quadratic')
     
     self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
     self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
     self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
     
-    # random score
-    actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
-    preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
-    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds = tf.convert_to_tensor(preds, dtype=tf.int32)
-
-    update_op1 = kp_obj1.update_state(actuals, preds)
-    update_op2 = kp_obj2.update_state(actuals, preds)
-    update_op3 = kp_obj3.update_state(actuals, preds)
+    return kp_obj1, kp_obj2, kp_obj3
+  
+  def update_obj_states(self, obj1, obj2, obj3, actuals, preds, weights):
+    update_op1 = obj1.update_state(actuals, preds, sample_weight=weights)
+    update_op2 = obj2.update_state(actuals, preds, sample_weight=weights)
+    update_op3 = obj3.update_state(actuals, preds, sample_weight=weights)
     
     self.evaluate(update_op1)
     self.evaluate(update_op2)
     self.evaluate(update_op3)
-
+  
+  def test_kappa_random_score(self):
+    actuals = [4, 4, 3, 4, 2, 4, 1, 1]
+    preds   = [4, 4, 3, 4, 4, 2, 1, 1]
+    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
+    
+    # Initialize
+    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
+    
+    # Update
+    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
+    
+    # Check results
     self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 0.61904, 4)
     self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 0.62790, 4)
     self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 0.68932, 4)
 
   def test_kappa_perfect_score(self):
-    kp_obj1 = CohensKappa(num_classes=5)
-    kp_obj2 = CohensKappa(num_classes=5, weightage='linear')
-    kp_obj3 = CohensKappa(num_classes=5, weightage='quadratic')
-    
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
-    
-    # perfect score
-    actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
-    preds = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
+    actuals = [4, 4, 3, 3, 2, 2, 1, 1]
+    preds   = [4, 4, 3, 3, 2, 2, 1, 1]
     actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
 
-    update_op1 = kp_obj1.update_state(actuals, preds)
-    update_op2 = kp_obj2.update_state(actuals, preds)
-    update_op3 = kp_obj3.update_state(actuals, preds)
-    
-    self.evaluate(update_op1)
-    self.evaluate(update_op2)
-    self.evaluate(update_op3)
+    # Initialize
+    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
     
+    # Update
+    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
     
     self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 1.0, 4)
     self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 1.0, 4)
     self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 1.0, 4)
 
   def test_kappa_worse_than_random(self):
-    kp_obj1 = CohensKappa(num_classes=5)
-    kp_obj2 = CohensKappa(num_classes=5, weightage='linear')
-    kp_obj3 = CohensKappa(num_classes=5, weightage='quadratic')
-    
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
-    
-    # worse than random 
-    actuals = np.array([4, 4, 3, 3, 2, 2, 1, 1], dtype=np.int32)
-    preds = np.array([1, 2, 4, 1, 3, 3, 4, 4], dtype=np.int32)
+    actuals = [4, 4, 3, 3, 2, 2, 1, 1]
+    preds   = [1, 2, 4, 1, 3, 3, 4, 4]
     actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
 
-    update_op1 = kp_obj1.update_state(actuals, preds)
-    update_op2 = kp_obj2.update_state(actuals, preds)
-    update_op3 = kp_obj3.update_state(actuals, preds)
+    # Initialize
+    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
     
-    self.evaluate(update_op1)
-    self.evaluate(update_op2)
-    self.evaluate(update_op3)
+    # Update
+    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
 
     self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.33333, 4)
     self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.52380, 4)
     self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.72727, 4)
+    
+  def test_kappa_with_sample_weights(self):
+    actuals = [4, 4, 3, 3, 2, 2, 1, 1]
+    preds   = [1, 2, 4, 1, 3, 3, 4, 4]
+    weights = [1, 1, 2, 5, 10, 2, 3, 3]
+    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
+    weights = tf.convert_to_tensor(weights, dtype=tf.int32)
+
+    # Initialize
+    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
+    
+    # Update
+    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, weights)
 
-if __name__ == '__main__':
-  tf.test.main() 
\ No newline at end of file
+    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.254733, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.389923, 4)
+    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.606953, 4)
\ No newline at end of file

From dda3336b22ec1489b61a94d07e53d80059467219 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 18:27:23 +0530
Subject: [PATCH 11/22] add CohenKappa metric

---
 tensorflow_addons/metrics/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow_addons/metrics/__init__.py b/tensorflow_addons/metrics/__init__.py
index 5ef2f13641..4610eb870d 100644
--- a/tensorflow_addons/metrics/__init__.py
+++ b/tensorflow_addons/metrics/__init__.py
@@ -17,3 +17,5 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
+from tensorflow_addons.metrics.cohens_kappa import CohenKappa
\ No newline at end of file

From 520a082258bc752adc055ec70a1eedfb24415408 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 19:00:53 +0530
Subject: [PATCH 12/22] format code

---
 tensorflow_addons/metrics/cohens_kappa.py | 300 +++++++++++-----------
 1 file changed, 151 insertions(+), 149 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index 96d181c853..a5a234ba27 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -12,13 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Implements Cohen's Kappa"""
+"""Implements Cohen's Kappa."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import tensorflow as tf
 import tensorflow.keras.backend as K
 from tensorflow.math import confusion_matrix
@@ -28,153 +27,156 @@
 
 @keras_utils.register_keras_custom_object
 class CohenKappa(Metric):
-  """Computes Kappa score between two raters.
-
-  The score lies in the range [-1, 1]. A score of -1 represents
-  complete disagreement between two raters whereas a score of 1 
-  represents complete agreement between the two raters. 
-  A score of 0 means agreement by chance.
-  
-  Note: As of now, this implementation considers all labels 
-  while calculating the Cohen's Kappa score.
-
-  Usage:
-  ```python
-  actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
-  preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
-
-  m = tf.keras.metrics.CohenKappa(num_classes=5)
-  m.update_state(actuals, preds, "quadratic")
-  print('Final result: ', m.result().numpy()) # Result: 0.68932
-  ```
-  Usage with tf.keras API:
-  ```python
-  model = keras.models.Model(inputs, outputs)
-  model.add_metric(tf.keras.metrics.CohenKappa(num_classes=5)(outputs))
-  model.compile('sgd', loss='mse')
-  ```
-
-  Args:
-    num_classes : Number of unique classes in your dataset
-    weightage   : Type of weighting to be considered for calculating 
-      kappa statistics. A valid value is one of [None, 'linear', 'quadratic'].
-      Defaults to None
-
-  Returns:
-    kappa_score : float
-      The kappa statistic, which is a number between -1 and 1. The maximum
-      value means complete agreement; zero or lower means chance agreement.
-
-  Raises:
-    ValueError: If the value passed for `weightage` is invalid
-      i.e. not any one of [None, 'linear', 'quadratic']
-
-  """
-  def __init__(self,
-               num_classes,
-               name='cohen_kappa', 
-               weightage=None, 
-               dtype=tf.float32):
-    super(CohenKappa, self).__init__(name=name, dtype=dtype)
-    
-    if weightage not in (None, 'linear', 'quadratic'):
-      raise ValueError("Unknown kappa weighting type.")
-    else:
-      self.weightage = weightage
-   
-    self.num_classes = num_classes
-    self.conf_mtx = self.add_weight('conf_mtx',
-                                    shape=(self.num_classes, self.num_classes),
-                                    initializer=tf.initializers.zeros,
-                                    dtype=tf.int32)
-
-  def update_state(self, y_true, y_pred, sample_weight=None):
-    """Accumulates the confusion matrix condition statistics.
-    
+    """Computes Kappa score between two raters.
+
+    The score lies in the range [-1, 1]. A score of -1 represents
+    complete disagreement between two raters whereas a score of 1
+    represents complete agreement between the two raters.
+    A score of 0 means agreement by chance.
+
+    Note: As of now, this implementation considers all labels
+    while calculating the Cohen's Kappa score.
+
+    Usage:
+    ```python
+    actuals = np.array([4, 4, 3, 4, 2, 4, 1, 1], dtype=np.int32)
+    preds = np.array([4, 4, 3, 4, 4, 2, 1, 1], dtype=np.int32)
+
+    m = tf.keras.metrics.CohenKappa(num_classes=5)
+    m.update_state(actuals, preds, "quadratic")
+    print('Final result: ', m.result().numpy()) # Result: 0.68932
+    ```
+    Usage with tf.keras API:
+    ```python
+    model = keras.models.Model(inputs, outputs)
+    model.add_metric(tf.keras.metrics.CohenKappa(num_classes=5)(outputs))
+    model.compile('sgd', loss='mse')
+    ```
+
     Args:
-      y_true : array, shape = [n_samples]
-               Labels assigned by the first annotator.
-      y_pred : array, shape = [n_samples]
-               Labels assigned by the second annotator. The kappa statistic 
-               is symmetric, so swapping ``y_true`` and ``y_pred`` doesn't 
-               change the value.
-      sample_weight(optional) : for weighting labels in confusion matrix
-               Default is None. The dtype for weights should be the same as 
-               the dtype for confusion matrix.Check tf.math.consfusion_matrix 
-               for details
-               
-    
+      num_classes : Number of unique classes in your dataset
+      weightage   : Type of weighting to be considered for calculating
+        kappa statistics. A valid value is one of [None, 'linear', 'quadratic'].
+        Defaults to None
+
     Returns:
-      Update op.
-      
+      kappa_score : float
+        The kappa statistic, which is a number between -1 and 1. The maximum
+        value means complete agreement; zero or lower means chance agreement.
+
+    Raises:
+      ValueError: If the value passed for `weightage` is invalid
+        i.e. not any one of [None, 'linear', 'quadratic']
     """
-    y_true = tf.cast(y_true, dtype=tf.int32)
-    y_pred = tf.cast(y_pred, dtype=tf.int32)
-    
-    if y_true.shape != y_pred.shape:
-      raise ValueError("Number of samples in y_true and y_pred are different")
-    
-    # compute the new values of the confusion matrix
-    new_conf_mtx = confusion_matrix(labels=y_true, 
-                                     predictions=y_pred,
-                                     num_classes=self.num_classes,
-                                     weights=sample_weight)
-  
-    # update the values in the orifinal confusion matrix
-    return self.conf_mtx.assign_add(new_conf_mtx)
-
-  def result(self):    
-    nb_ratings = tf.shape(self.conf_mtx)[0]
-    weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
-    
-    # 2. Create a weight matrix
-    if self.weightage is None:
-      diagonal = tf.zeros([5], dtype=tf.int32)
-      weight_mtx = tf.linalg.set_diag(weight_mtx, diagonal=diagonal)
-      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
-    
-    else:
-      weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
-      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
-
-      if self.weightage=='linear':
-        weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
-      else:
-        weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
-      weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
-    
-    # 3. Get counts
-    actual_ratings_hist = K.sum(self.conf_mtx, axis=1)
-    pred_ratings_hist = K.sum(self.conf_mtx, axis=0)
-    
-    # 4. Get the outer product
-    out_prod = pred_ratings_hist[..., None] * actual_ratings_hist[None, ...]
-    
-    # 5. Normalize the confusion matrix and outer product
-    conf_mtx = self.conf_mtx / K.sum(self.conf_mtx)
-    out_prod = out_prod / K.sum(out_prod)
-    
-    conf_mtx = tf.cast(conf_mtx, dtype=tf.float32)
-    out_prod = tf.cast(out_prod, dtype=tf.float32)
-    
-    # 6. Calculate Kappa score
-    numerator = K.sum(conf_mtx * weight_mtx)
-    denominator = K.sum(out_prod * weight_mtx) 
-    kp = 1-(numerator/denominator)
-    return kp
-  
-  def get_config(self):
-    """Returns the serializable config of the metric."""
-    
-    config = {
-        "num_classes": self.num_classes,
-        "weightage": self.weightage,
-    }
-    base_config = super(CohenKappa, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-  
-  def reset_states(self):
-    """Resets all of the metric state variables."""
-    
-    for v in self.variables:
-      K.set_value(v ,np.zeros((self.num_classes, self.num_classes), np.int32))
\ No newline at end of file
+
+    def __init__(self,
+                 num_classes,
+                 name='cohen_kappa',
+                 weightage=None,
+                 dtype=tf.float32):
+        super(CohenKappa, self).__init__(name=name, dtype=dtype)
+
+        if weightage not in (None, 'linear', 'quadratic'):
+            raise ValueError("Unknown kappa weighting type.")
+        else:
+            self.weightage = weightage
+
+        self.num_classes = num_classes
+        self.conf_mtx = self.add_weight(
+            'conf_mtx',
+            shape=(self.num_classes, self.num_classes),
+            initializer=tf.initializers.zeros,
+            dtype=tf.int32)
+
+    def update_state(self, y_true, y_pred, sample_weight=None):
+        """Accumulates the confusion matrix condition statistics.
+
+        Args:
+          y_true : array, shape = [n_samples]
+                   Labels assigned by the first annotator.
+          y_pred : array, shape = [n_samples]
+                   Labels assigned by the second annotator. The kappa statistic
+                   is symmetric, so swapping ``y_true`` and ``y_pred`` doesn't
+                   change the value.
+          sample_weight(optional) : for weighting labels in confusion matrix
+                   Default is None. The dtype for weights should be the same as
+                   the dtype for confusion matrix.Check tf.math.consfusion_matrix
+                   for details
+
+
+        Returns:
+          Update op.
+        """
+        y_true = tf.cast(y_true, dtype=tf.int32)
+        y_pred = tf.cast(y_pred, dtype=tf.int32)
+
+        if y_true.shape != y_pred.shape:
+            raise ValueError(
+                "Number of samples in y_true and y_pred are different")
+
+        # compute the new values of the confusion matrix
+        new_conf_mtx = confusion_matrix(
+            labels=y_true,
+            predictions=y_pred,
+            num_classes=self.num_classes,
+            weights=sample_weight)
+
+        # update the values in the orifinal confusion matrix
+        return self.conf_mtx.assign_add(new_conf_mtx)
+
+    def result(self):
+        nb_ratings = tf.shape(self.conf_mtx)[0]
+        weight_mtx = tf.ones([nb_ratings, nb_ratings], dtype=tf.int32)
+
+        # 2. Create a weight matrix
+        if self.weightage is None:
+            diagonal = tf.zeros([5], dtype=tf.int32)
+            weight_mtx = tf.linalg.set_diag(weight_mtx, diagonal=diagonal)
+            weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+
+        else:
+            weight_mtx += tf.range(nb_ratings, dtype=tf.int32)
+            weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+
+            if self.weightage == 'linear':
+                weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
+            else:
+                weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
+            weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
+
+        # 3. Get counts
+        actual_ratings_hist = K.sum(self.conf_mtx, axis=1)
+        pred_ratings_hist = K.sum(self.conf_mtx, axis=0)
+
+        # 4. Get the outer product
+        out_prod = pred_ratings_hist[..., None] * actual_ratings_hist[None, ...]
+
+        # 5. Normalize the confusion matrix and outer product
+        conf_mtx = self.conf_mtx / K.sum(self.conf_mtx)
+        out_prod = out_prod / K.sum(out_prod)
+
+        conf_mtx = tf.cast(conf_mtx, dtype=tf.float32)
+        out_prod = tf.cast(out_prod, dtype=tf.float32)
+
+        # 6. Calculate Kappa score
+        numerator = K.sum(conf_mtx * weight_mtx)
+        denominator = K.sum(out_prod * weight_mtx)
+        kp = 1 - (numerator / denominator)
+        return kp
+
+    def get_config(self):
+        """Returns the serializable config of the metric."""
+
+        config = {
+            "num_classes": self.num_classes,
+            "weightage": self.weightage,
+        }
+        base_config = super(CohenKappa, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def reset_states(self):
+        """Resets all of the metric state variables."""
+
+        for v in self.variables:
+            K.set_value(
+                v, np.zeros((self.num_classes, self.num_classes), np.int32))

From cb46fe5e6f818762dbc4d9c6b4347e9dab42ee39 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 19:12:40 +0530
Subject: [PATCH 13/22] format code

---
 .../metrics/cohens_kappa_test.py              | 198 +++++++++---------
 1 file changed, 100 insertions(+), 98 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index cc9c8d7e3a..5e94038f60 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -24,103 +24,105 @@
 from tensorflow_addons.metrics import CohensKappa
 from tensorflow_addons.utils import test_utils
 
+
 @test_utils.run_all_in_graph_and_eager_modes
 class CohenKappaTest(tf.test.TestCase):
-  def test_config(self):
-    kp_obj = CohenKappa(name='cohen_kappa', num_classes=5)
-    self.assertEqual(kp_obj.name, 'cohen_kappa')
-    self.assertEqual(kp_obj.dtype, tf.float32)
-    self.assertEqual(kp_obj.num_classes, 5)
-    
-    # Check save and restore config
-    kb_obj2 = CohenKappa.from_config(kp_obj.get_config())
-    self.assertEqual(kb_obj2.name, 'cohen_kappa')
-    self.assertEqual(kb_obj2.dtype, tf.float32)
-    self.assertEqual(kp_obj.num_classes, 5)
-    
-  def initialize_vars(self):
-    kp_obj1 = CohenKappa(num_classes=5)
-    kp_obj2 = CohenKappa(num_classes=5, weightage='linear')
-    kp_obj3 = CohenKappa(num_classes=5, weightage='quadratic')
-    
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
-    self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
-    
-    return kp_obj1, kp_obj2, kp_obj3
-  
-  def update_obj_states(self, obj1, obj2, obj3, actuals, preds, weights):
-    update_op1 = obj1.update_state(actuals, preds, sample_weight=weights)
-    update_op2 = obj2.update_state(actuals, preds, sample_weight=weights)
-    update_op3 = obj3.update_state(actuals, preds, sample_weight=weights)
-    
-    self.evaluate(update_op1)
-    self.evaluate(update_op2)
-    self.evaluate(update_op3)
-  
-  def test_kappa_random_score(self):
-    actuals = [4, 4, 3, 4, 2, 4, 1, 1]
-    preds   = [4, 4, 3, 4, 4, 2, 1, 1]
-    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
-    
-    # Initialize
-    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
-    
-    # Update
-    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
-    
-    # Check results
-    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 0.61904, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 0.62790, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 0.68932, 4)
-
-  def test_kappa_perfect_score(self):
-    actuals = [4, 4, 3, 3, 2, 2, 1, 1]
-    preds   = [4, 4, 3, 3, 2, 2, 1, 1]
-    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
-
-    # Initialize
-    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
-    
-    # Update
-    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
-    
-    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 1.0, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 1.0, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 1.0, 4)
-
-  def test_kappa_worse_than_random(self):
-    actuals = [4, 4, 3, 3, 2, 2, 1, 1]
-    preds   = [1, 2, 4, 1, 3, 3, 4, 4]
-    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
-
-    # Initialize
-    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
-    
-    # Update
-    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
-
-    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.33333, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.52380, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.72727, 4)
-    
-  def test_kappa_with_sample_weights(self):
-    actuals = [4, 4, 3, 3, 2, 2, 1, 1]
-    preds   = [1, 2, 4, 1, 3, 3, 4, 4]
-    weights = [1, 1, 2, 5, 10, 2, 3, 3]
-    actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-    preds   = tf.convert_to_tensor(preds, dtype=tf.int32)
-    weights = tf.convert_to_tensor(weights, dtype=tf.int32)
-
-    # Initialize
-    kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
-    
-    # Update
-    self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, weights)
-
-    self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.254733, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.389923, 4)
-    self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.606953, 4)
\ No newline at end of file
+    def test_config(self):
+        kp_obj = CohenKappa(name='cohen_kappa', num_classes=5)
+        self.assertEqual(kp_obj.name, 'cohen_kappa')
+        self.assertEqual(kp_obj.dtype, tf.float32)
+        self.assertEqual(kp_obj.num_classes, 5)
+
+        # Check save and restore config
+        kb_obj2 = CohenKappa.from_config(kp_obj.get_config())
+        self.assertEqual(kb_obj2.name, 'cohen_kappa')
+        self.assertEqual(kb_obj2.dtype, tf.float32)
+        self.assertEqual(kp_obj.num_classes, 5)
+
+    def initialize_vars(self):
+        kp_obj1 = CohenKappa(num_classes=5)
+        kp_obj2 = CohenKappa(num_classes=5, weightage='linear')
+        kp_obj3 = CohenKappa(num_classes=5, weightage='quadratic')
+
+        self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
+        self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
+        self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
+
+        return kp_obj1, kp_obj2, kp_obj3
+
+    def update_obj_states(self, obj1, obj2, obj3, actuals, preds, weights):
+        update_op1 = obj1.update_state(actuals, preds, sample_weight=weights)
+        update_op2 = obj2.update_state(actuals, preds, sample_weight=weights)
+        update_op3 = obj3.update_state(actuals, preds, sample_weight=weights)
+
+        self.evaluate(update_op1)
+        self.evaluate(update_op2)
+        self.evaluate(update_op3)
+
+    def test_kappa_random_score(self):
+        actuals = [4, 4, 3, 4, 2, 4, 1, 1]
+        preds = [4, 4, 3, 4, 4, 2, 1, 1]
+        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+
+        # Initialize
+        kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
+
+        # Update
+        self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
+
+        # Check results
+        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 0.61904, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 0.62790, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 0.68932, 4)
+
+    def test_kappa_perfect_score(self):
+        actuals = [4, 4, 3, 3, 2, 2, 1, 1]
+        preds = [4, 4, 3, 3, 2, 2, 1, 1]
+        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+
+        # Initialize
+        kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
+
+        # Update
+        self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
+
+        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 1.0, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 1.0, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 1.0, 4)
+
+    def test_kappa_worse_than_random(self):
+        actuals = [4, 4, 3, 3, 2, 2, 1, 1]
+        preds = [1, 2, 4, 1, 3, 3, 4, 4]
+        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+
+        # Initialize
+        kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
+
+        # Update
+        self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
+
+        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.33333, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.52380, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.72727, 4)
+
+    def test_kappa_with_sample_weights(self):
+        actuals = [4, 4, 3, 3, 2, 2, 1, 1]
+        preds = [1, 2, 4, 1, 3, 3, 4, 4]
+        weights = [1, 1, 2, 5, 10, 2, 3, 3]
+        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
+        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+        weights = tf.convert_to_tensor(weights, dtype=tf.int32)
+
+        # Initialize
+        kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
+
+        # Update
+        self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds,
+                               weights)
+
+        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.254733, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.389923, 4)
+        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.606953, 4)

From 9ea909e05d32d31b72a14f3487ef42e19ccfa7ec Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 20:28:39 +0530
Subject: [PATCH 14/22] make sure all tests pass

---
 tensorflow_addons/metrics/cohens_kappa.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index a5a234ba27..fb834baf1a 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -19,6 +19,7 @@
 from __future__ import print_function
 
 import tensorflow as tf
+import numpy as np
 import tensorflow.keras.backend as K
 from tensorflow.math import confusion_matrix
 from tensorflow.keras.metrics import Metric
@@ -55,9 +56,9 @@ class CohenKappa(Metric):
 
     Args:
       num_classes : Number of unique classes in your dataset
-      weightage   : Type of weighting to be considered for calculating
-        kappa statistics. A valid value is one of [None, 'linear', 'quadratic'].
-        Defaults to None
+      weightage   : Weighting to be considered for calculating
+                    kappa statistics. A valid value is one of
+                    [None, 'linear', 'quadratic']. Defaults to None.
 
     Returns:
       kappa_score : float
@@ -99,9 +100,9 @@ def update_state(self, y_true, y_pred, sample_weight=None):
                    is symmetric, so swapping ``y_true`` and ``y_pred`` doesn't
                    change the value.
           sample_weight(optional) : for weighting labels in confusion matrix
-                   Default is None. The dtype for weights should be the same as
-                   the dtype for confusion matrix.Check tf.math.consfusion_matrix
-                   for details
+                   Default is None. The dtype for weights should be the same
+                   asthe dtype for confusion matrix. For more details,
+                   please Check tf.math.consfusion_matrix
 
 
         Returns:
@@ -149,7 +150,8 @@ def result(self):
         pred_ratings_hist = K.sum(self.conf_mtx, axis=0)
 
         # 4. Get the outer product
-        out_prod = pred_ratings_hist[..., None] * actual_ratings_hist[None, ...]
+        out_prod = pred_ratings_hist[..., None] * \
+                    actual_ratings_hist[None, ...]
 
         # 5. Normalize the confusion matrix and outer product
         conf_mtx = self.conf_mtx / K.sum(self.conf_mtx)

From d2b87a42ddb0dccb47766e9c4258820974824b59 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 20:29:37 +0530
Subject: [PATCH 15/22] fix typo in imports

---
 tensorflow_addons/metrics/cohens_kappa_test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index 5e94038f60..d8b0fab887 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -18,10 +18,8 @@
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
 import tensorflow as tf
-
-from tensorflow_addons.metrics import CohensKappa
+from tensorflow_addons.metrics import CohenKappa
 from tensorflow_addons.utils import test_utils
 
 

From afc35b5ecab4b49fa09f8fcc3bfa5cebb75b8b40 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 23:22:00 +0530
Subject: [PATCH 16/22] code refactor

---
 tensorflow_addons/metrics/cohens_kappa.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index fb834baf1a..aba9d78e87 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -101,8 +101,8 @@ def update_state(self, y_true, y_pred, sample_weight=None):
                    change the value.
           sample_weight(optional) : for weighting labels in confusion matrix
                    Default is None. The dtype for weights should be the same
-                   asthe dtype for confusion matrix. For more details,
-                   please Check tf.math.consfusion_matrix
+                   as the dtype for confusion matrix. For more details,
+                   please check tf.math.confusion_matrix.
 
 
         Returns:
@@ -140,29 +140,29 @@ def result(self):
             weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
 
             if self.weightage == 'linear':
-                weight_mtx = tf.abs(weight_mtx - K.transpose(weight_mtx))
+                weight_mtx = tf.abs(weight_mtx - tf.transpose(weight_mtx))
             else:
-                weight_mtx = K.pow((weight_mtx - K.transpose(weight_mtx)), 2)
+                weight_mtx = tf.pow((weight_mtx - tf.transpose(weight_mtx)), 2)
             weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
 
         # 3. Get counts
-        actual_ratings_hist = K.sum(self.conf_mtx, axis=1)
-        pred_ratings_hist = K.sum(self.conf_mtx, axis=0)
+        actual_ratings_hist = tf.reduce_sum(self.conf_mtx, axis=1)
+        pred_ratings_hist = tf.reduce_sum(self.conf_mtx, axis=0)
 
         # 4. Get the outer product
         out_prod = pred_ratings_hist[..., None] * \
                     actual_ratings_hist[None, ...]
 
         # 5. Normalize the confusion matrix and outer product
-        conf_mtx = self.conf_mtx / K.sum(self.conf_mtx)
-        out_prod = out_prod / K.sum(out_prod)
+        conf_mtx = self.conf_mtx / tf.reduce_sum(self.conf_mtx)
+        out_prod = out_prod / tf.reduce_sum(out_prod)
 
         conf_mtx = tf.cast(conf_mtx, dtype=tf.float32)
         out_prod = tf.cast(out_prod, dtype=tf.float32)
 
         # 6. Calculate Kappa score
-        numerator = K.sum(conf_mtx * weight_mtx)
-        denominator = K.sum(out_prod * weight_mtx)
+        numerator = tf.reduce_sum(conf_mtx * weight_mtx)
+        denominator = tf.reduce_sum(out_prod * weight_mtx)
         kp = 1 - (numerator / denominator)
         return kp
 
@@ -181,4 +181,4 @@ def reset_states(self):
 
         for v in self.variables:
             K.set_value(
-                v, np.zeros((self.num_classes, self.num_classes), np.int32))
+                v, np.zeros((self.num_classes, self.num_classes), np.int32))
\ No newline at end of file

From 8cadec43ea910b617349c9c270ae671cda31813c Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 23:23:00 +0530
Subject: [PATCH 17/22] refactor code

---
 .../metrics/cohens_kappa_test.py              | 53 +++++++++++--------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa_test.py b/tensorflow_addons/metrics/cohens_kappa_test.py
index d8b0fab887..4793bd5c7a 100644
--- a/tensorflow_addons/metrics/cohens_kappa_test.py
+++ b/tensorflow_addons/metrics/cohens_kappa_test.py
@@ -45,7 +45,6 @@ def initialize_vars(self):
         self.evaluate(tf.compat.v1.variables_initializer(kp_obj1.variables))
         self.evaluate(tf.compat.v1.variables_initializer(kp_obj2.variables))
         self.evaluate(tf.compat.v1.variables_initializer(kp_obj3.variables))
-
         return kp_obj1, kp_obj2, kp_obj3
 
     def update_obj_states(self, obj1, obj2, obj3, actuals, preds, weights):
@@ -57,11 +56,19 @@ def update_obj_states(self, obj1, obj2, obj3, actuals, preds, weights):
         self.evaluate(update_op2)
         self.evaluate(update_op3)
 
+    def check_results(self, objs, values):
+        obj1, obj2, obj3 = objs
+        val1, val2, val3 = values
+
+        self.assertAllClose(val1, self.evaluate(obj1.result()), atol=1e-5)
+        self.assertAllClose(val2, self.evaluate(obj2.result()), atol=1e-5)
+        self.assertAllClose(val3, self.evaluate(obj3.result()), atol=1e-5)
+
     def test_kappa_random_score(self):
         actuals = [4, 4, 3, 4, 2, 4, 1, 1]
         preds = [4, 4, 3, 4, 4, 2, 1, 1]
-        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+        actuals = tf.constant(actuals, dtype=tf.int32)
+        preds = tf.constant(preds, dtype=tf.int32)
 
         # Initialize
         kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
@@ -70,15 +77,14 @@ def test_kappa_random_score(self):
         self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
 
         # Check results
-        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 0.61904, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 0.62790, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 0.68932, 4)
+        self.check_results([kp_obj1, kp_obj2, kp_obj3],
+                           [0.61904761, 0.62790697, 0.68932038])
 
     def test_kappa_perfect_score(self):
         actuals = [4, 4, 3, 3, 2, 2, 1, 1]
         preds = [4, 4, 3, 3, 2, 2, 1, 1]
-        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+        actuals = tf.constant(actuals, dtype=tf.int32)
+        preds = tf.constant(preds, dtype=tf.int32)
 
         # Initialize
         kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
@@ -86,15 +92,14 @@ def test_kappa_perfect_score(self):
         # Update
         self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
 
-        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), 1.0, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), 1.0, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), 1.0, 4)
+        # Check results
+        self.check_results([kp_obj1, kp_obj2, kp_obj3], [1.0, 1.0, 1.0])
 
     def test_kappa_worse_than_random(self):
         actuals = [4, 4, 3, 3, 2, 2, 1, 1]
         preds = [1, 2, 4, 1, 3, 3, 4, 4]
-        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
+        actuals = tf.constant(actuals, dtype=tf.int32)
+        preds = tf.constant(preds, dtype=tf.int32)
 
         # Initialize
         kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
@@ -102,17 +107,17 @@ def test_kappa_worse_than_random(self):
         # Update
         self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds, None)
 
-        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.33333, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.52380, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.72727, 4)
+        # check results
+        self.check_results([kp_obj1, kp_obj2, kp_obj3],
+                           [-0.3333333, -0.52380952, -0.72727272])
 
     def test_kappa_with_sample_weights(self):
         actuals = [4, 4, 3, 3, 2, 2, 1, 1]
         preds = [1, 2, 4, 1, 3, 3, 4, 4]
         weights = [1, 1, 2, 5, 10, 2, 3, 3]
-        actuals = tf.convert_to_tensor(actuals, dtype=tf.int32)
-        preds = tf.convert_to_tensor(preds, dtype=tf.int32)
-        weights = tf.convert_to_tensor(weights, dtype=tf.int32)
+        actuals = tf.constant(actuals, dtype=tf.int32)
+        preds = tf.constant(preds, dtype=tf.int32)
+        weights = tf.constant(weights, dtype=tf.int32)
 
         # Initialize
         kp_obj1, kp_obj2, kp_obj3 = self.initialize_vars()
@@ -121,6 +126,10 @@ def test_kappa_with_sample_weights(self):
         self.update_obj_states(kp_obj1, kp_obj2, kp_obj3, actuals, preds,
                                weights)
 
-        self.assertAlmostEqual(self.evaluate(kp_obj1.result()), -0.254733, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj2.result()), -0.389923, 4)
-        self.assertAlmostEqual(self.evaluate(kp_obj3.result()), -0.606953, 4)
+        # check results
+        self.check_results([kp_obj1, kp_obj2, kp_obj3],
+                           [-0.25473321, -0.38992332, -0.60695344])
+
+
+if __name__ == '__main__':
+    tf.test.main()

From 5711b186eedd846707516caa95af24f01e0baccc Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Tue, 11 Jun 2019 23:58:16 +0530
Subject: [PATCH 18/22] update README

---
 tensorflow_addons/metrics/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow_addons/metrics/README.md b/tensorflow_addons/metrics/README.md
index a6ae7ec2e3..4a978fbe08 100644
--- a/tensorflow_addons/metrics/README.md
+++ b/tensorflow_addons/metrics/README.md
@@ -3,12 +3,12 @@
 ## Maintainers
 | Submodule  | Maintainers  | Contact Info   |
 |:---------- |:------------- |:--------------|
-|  |  |  |
+|  cohens_kappa| Aakash Nain   |  aakashnain@outlook.com|
 
 ## Contents
-| Submodule | Activation  | Reference                               |
+| Submodule | Metric  | Reference                               |
 |:----------------------- |:-------------------|:---------------|
-|  |  |            |
+| cohens_kappa| CohenKappa|[Cohen's Kappa](https://en.wikipedia.org/wiki/Cohen%27s_kappa)|
 
 
 ## Contribution Guidelines

From 593f7fdd437451d5c50a5d8e871aabbcdb327011 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Wed, 12 Jun 2019 00:55:38 +0530
Subject: [PATCH 19/22] fix typo

---
 tensorflow_addons/metrics/cohens_kappa.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index aba9d78e87..f0e80417b6 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -122,7 +122,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
             num_classes=self.num_classes,
             weights=sample_weight)
 
-        # update the values in the orifinal confusion matrix
+        # update the values in the original confusion matrix
         return self.conf_mtx.assign_add(new_conf_mtx)
 
     def result(self):
@@ -131,7 +131,7 @@ def result(self):
 
         # 2. Create a weight matrix
         if self.weightage is None:
-            diagonal = tf.zeros([5], dtype=tf.int32)
+            diagonal = tf.zeros([nb_ratings], dtype=tf.int32)
             weight_mtx = tf.linalg.set_diag(weight_mtx, diagonal=diagonal)
             weight_mtx = tf.cast(weight_mtx, dtype=tf.float32)
 

From 6bf67e2112ead8536b20d7bbb1beb31cf1f8eea8 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Wed, 12 Jun 2019 18:20:41 +0530
Subject: [PATCH 20/22] remove math import

---
 tensorflow_addons/metrics/cohens_kappa.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index f0e80417b6..6ac6833518 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -21,8 +21,7 @@
 import tensorflow as tf
 import numpy as np
 import tensorflow.keras.backend as K
-from tensorflow.math import confusion_matrix
-from tensorflow.keras.metrics import Metric
+from tensorflow.metrics import Metric
 from tensorflow_addons.utils import keras_utils
 
 
@@ -116,7 +115,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
                 "Number of samples in y_true and y_pred are different")
 
         # compute the new values of the confusion matrix
-        new_conf_mtx = confusion_matrix(
+        new_conf_mtx = tf.math.confusion_matrix(
             labels=y_true,
             predictions=y_pred,
             num_classes=self.num_classes,

From da65a6c4d65963037a57eec210e168bf887725d6 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Wed, 12 Jun 2019 18:25:40 +0530
Subject: [PATCH 21/22] fix imports

---
 tensorflow_addons/metrics/cohens_kappa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index 6ac6833518..b41bafca54 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -21,7 +21,7 @@
 import tensorflow as tf
 import numpy as np
 import tensorflow.keras.backend as K
-from tensorflow.metrics import Metric
+from tensorflow.keras.metrics import Metric
 from tensorflow_addons.utils import keras_utils
 
 

From 0c31db81ac4d69d63e9fcf0a79aae9feed8e2176 Mon Sep 17 00:00:00 2001
From: Aakash Kumar Nain <aakashnain@outlook.com>
Date: Wed, 12 Jun 2019 21:45:48 +0530
Subject: [PATCH 22/22] fix initializer

---
 tensorflow_addons/metrics/cohens_kappa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/metrics/cohens_kappa.py b/tensorflow_addons/metrics/cohens_kappa.py
index b41bafca54..477cee1330 100644
--- a/tensorflow_addons/metrics/cohens_kappa.py
+++ b/tensorflow_addons/metrics/cohens_kappa.py
@@ -85,7 +85,7 @@ def __init__(self,
         self.conf_mtx = self.add_weight(
             'conf_mtx',
             shape=(self.num_classes, self.num_classes),
-            initializer=tf.initializers.zeros,
+            initializer=tf.keras.initializers.zeros,
             dtype=tf.int32)
 
     def update_state(self, y_true, y_pred, sample_weight=None):