input-output-hk · aakoshh · Dec 7, 2020 · Nov 26, 2020 · Nov 26, 2020 · Nov 26, 2020
diff --git a/...very/it/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryKademliaIntegrationSpec.scala b/...very/it/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryKademliaIntegrationSpec.scala
@@ -62,7 +62,8 @@ class DiscoveryKademliaIntegrationSpec extends KademliaIntegrationSpec("Discover
         kademliaAlpha = testConfig.alpha,
         kademliaBucketSize = testConfig.k,
         discoveryPeriod = testConfig.refreshRate,
-        knownPeers = initialNodes
+        knownPeers = initialNodes,
+        subnetLimitPrefixLength = 0
       )
       network <- Resource.liftF {
         DiscoveryNetwork[InetMultiAddress](

diff --git a/scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryConfig.scala b/scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryConfig.scala
@@ -23,7 +23,13 @@ case class DiscoveryConfig(
     // How often to look for new peers.
     discoveryPeriod: FiniteDuration,
     // Bootstrap nodes.
-    knownPeers: Set[Node]
+    knownPeers: Set[Node],
+    // Limit the number of IPs from the same subnet, given by its prefix length, e.g. /24; 0 means no limit.
+    subnetLimitPrefixLength: Int,
+    // Limit the number of IPs from the same subnet in any given bucket; 0 means no limit.
+    subnetLimitForBucket: Int,
+    // Limit the number of IPs from the same subnet in the whole k-table; 0 means no limit.
+    subnetLimitForTable: Int
 )
 
 object DiscoveryConfig {
@@ -36,6 +42,9 @@ object DiscoveryConfig {
     kademliaAlpha = 3,
     bondExpiration = 12.hours,
     discoveryPeriod = 15.minutes,
-    knownPeers = Set.empty
+    knownPeers = Set.empty,
+    subnetLimitPrefixLength = 24,
+    subnetLimitForBucket = 2,
+    subnetLimitForTable = 10
   )
 }
diff --git a/scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryService.scala b/scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/DiscoveryService.scala
@@ -6,7 +6,7 @@ import cats.implicits._
 import io.iohk.scalanet.discovery.crypto.{PrivateKey, SigAlg}
 import io.iohk.scalanet.discovery.ethereum.{Node, EthereumNodeRecord}
 import io.iohk.scalanet.discovery.hash.Hash
-import io.iohk.scalanet.kademlia.{KBuckets, XorOrdering}
+import io.iohk.scalanet.kademlia.XorOrdering
 import io.iohk.scalanet.peergroup.Addressable
 import java.net.InetAddress
 import monix.eval.Task
@@ -53,6 +53,7 @@ trait DiscoveryService {
 object DiscoveryService {
   import DiscoveryRPC.{Call, Proc}
   import DiscoveryNetwork.Peer
+  import KBucketsWithSubnetLimits.SubnetLimits
 
   type ENRSeq = Long
   type Timestamp = Long
@@ -90,7 +91,7 @@ object DiscoveryService {
           // Use the current time to set the ENR sequence to something fresh.
           now <- clock.monotonic(MILLISECONDS)
           enr <- Task(EthereumNodeRecord.fromNode(node, privateKey, seq = now).require)
-          stateRef <- Ref[Task].of(State[A](node, enr))
+          stateRef <- Ref[Task].of(State[A](node, enr, SubnetLimits.fromConfig(config)))
           service <- Task(new ServiceImpl[A](privateKey, config, network, stateRef, toAddress))
           // Start handling requests, we need them during enrolling so the peers can ping and bond with us.
           cancelToken <- network.startHandling(service)
@@ -144,7 +145,7 @@ object DiscoveryService {
       node: Node,
       enr: EthereumNodeRecord,
       // Kademlia buckets with hashes of the nodes' IDs in them.
-      kBuckets: KBuckets[Hash],
+      kBuckets: KBucketsWithSubnetLimits[A],
       kademliaIdToNodeId: Map[Hash, Node.Id],
       nodeMap: Map[Node.Id, Node],
       enrMap: Map[Node.Id, EthereumNodeRecord],
@@ -178,10 +179,10 @@ object DiscoveryService {
         kBuckets =
           if (isSelf(peer))
             kBuckets
-          else if (kBuckets.getBucket(peer.kademliaId)._2.contains(peer.kademliaId))
-            kBuckets.touch(peer.kademliaId)
+          else if (kBuckets.contains(peer))
+            kBuckets.touch(peer)
           else if (addToBucket)
-            kBuckets.add(peer.kademliaId)
+            kBuckets.add(peer)
           else
             kBuckets,
         kademliaIdToNodeId = kademliaIdToNodeId.updated(peer.kademliaId, peer.id)
@@ -190,8 +191,8 @@ object DiscoveryService {
 
     /** Update the timestamp of the peer in the K-table, if it's still part of it. */
     def withTouch(peer: Peer[A]): State[A] =
-      if (kBuckets.contains(peer.kademliaId))
-        copy(kBuckets = kBuckets.touch(peer.kademliaId))
+      if (kBuckets.contains(peer))
+        copy(kBuckets = kBuckets.touch(peer))
       else
         // Not adding because `kademliaIdToNodeId` and `nodeMap` may no longer have this peer.
         this
@@ -217,7 +218,7 @@ object DiscoveryService {
           copy(
             nodeMap = nodeMap - peer.id,
             enrMap = enrMap - peer.id,
-            kBuckets = kBuckets.remove(peer.kademliaId),
+            kBuckets = kBuckets.remove(peer),
             kademliaIdToNodeId = kademliaIdToNodeId - peer.kademliaId
           )
         case _ => this
@@ -228,32 +229,36 @@ object DiscoveryService {
       )
     }
 
-    def removePeer(peerId: Node.Id): State[A] =
+    def removePeer(peerId: Node.Id, toAddress: Node.Address => A): State[A] = {
+      // Find any Peer records that correspond to this ID.
+      val peers: Set[Peer[A]] = (
+        nodeMap.get(peerId).map(node => Peer(node.id, toAddress(node.address))).toSeq ++
+          lastPongTimestampMap.keys.filter(_.id == peerId).toSeq ++
+          bondingResultsMap.keys.filter(_.id == peerId).toSeq
+      ).toSet
+
       copy(
         nodeMap = nodeMap - peerId,
         enrMap = enrMap - peerId,
-        lastPongTimestampMap = lastPongTimestampMap.filterNot {
-          case (peer, _) => peer.id == peerId
-        },
-        bondingResultsMap = bondingResultsMap.filterNot {
-          case (peer, _) => peer.id == peerId
-        },
-        kBuckets = kBuckets.remove(Node.kademliaId(peerId)),
+        lastPongTimestampMap = lastPongTimestampMap -- peers,
+        bondingResultsMap = bondingResultsMap -- peers,
+        kBuckets = peers.foldLeft(kBuckets)(_ remove _),
         kademliaIdToNodeId = kademliaIdToNodeId - Node.kademliaId(peerId)
       )
+    }
 
     def setEnrolled: State[A] =
       copy(hasEnrolled = true)
   }
   protected[v4] object State {
-    def apply[A](
+    def apply[A: Addressable](
         node: Node,
         enr: EthereumNodeRecord,
-        clock: java.time.Clock = java.time.Clock.systemUTC()
+        subnetLimits: SubnetLimits
     ): State[A] = State[A](
       node = node,
       enr = enr,
-      kBuckets = new KBuckets[Hash](node.kademliaId, clock),
+      kBuckets = KBucketsWithSubnetLimits[A](node, subnetLimits),
       kademliaIdToNodeId = Map(node.kademliaId -> node.id),
       nodeMap = Map(node.id -> node),
       enrMap = Map(node.id -> enr),
@@ -306,7 +311,7 @@ object DiscoveryService {
 
     override def removeNode(nodeId: Node.Id): Task[Unit] =
       stateRef.update { state =>
-        if (state.node.id == nodeId) state else state.removePeer(nodeId)
+        if (state.node.id == nodeId) state else state.removePeer(nodeId, toAddress)
       }
 
     /** Update the node and ENR of the local peer with the new address and ping peers with the new ENR seq. */
@@ -673,7 +678,7 @@ object DiscoveryService {
           if (state.isSelf(peer))
             state -> None
           else {
-            val (_, bucket) = state.kBuckets.getBucket(peer.kademliaId)
+            val (_, bucket) = state.kBuckets.getBucket(peer)
             val (addToBucket, maybeEvict) =
               if (bucket.contains(peer.kademliaId) || bucket.size < config.kademliaBucketSize) {
                 // We can just update the records, the bucket either has room or won't need to grow.

diff --git a/scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/KBucketsWithSubnetLimits.scala b/scalanet/discovery/src/io/iohk/scalanet/discovery/ethereum/v4/KBucketsWithSubnetLimits.scala
@@ -0,0 +1,159 @@
+package io.iohk.scalanet.discovery.ethereum.v4
+
+import cats._
+import cats.implicits._
+import io.iohk.scalanet.discovery.hash.Hash
+import io.iohk.scalanet.discovery.ethereum.Node
+import io.iohk.scalanet.kademlia.{KBuckets, TimeSet}
+import io.iohk.scalanet.peergroup.Addressable
+import io.iohk.scalanet.peergroup.InetAddressOps._
+import java.net.InetAddress
+
+case class KBucketsWithSubnetLimits[A: Addressable](
+    table: KBuckets[Hash],
+    limits: KBucketsWithSubnetLimits.SubnetLimits,
+    tableLevelCounts: KBucketsWithSubnetLimits.TableLevelCounts,
+    bucketLevelCounts: KBucketsWithSubnetLimits.BucketLevelCounts
+) {
+  import DiscoveryNetwork.Peer
+  import KBucketsWithSubnetLimits._
+
+  def contains(peer: Peer[A]): Boolean =
+    table.contains(peer.kademliaId)
+
+  def touch(peer: Peer[A]): KBucketsWithSubnetLimits[A] =
+    // Note that `KBuckets.touch` also adds, so if the the record
+    // isn't in the table already then use `add` to maintain counts.
+    if (contains(peer)) copy(table = table.touch(peer.kademliaId)) else add(peer)
+
+  /** Add the peer to the underlying K-table unless doing so would violate some limit. */
+  def add(peer: Peer[A]): KBucketsWithSubnetLimits[A] =
+    if (contains(peer)) this
+    else {
+      val ip = subnet(peer)
+      val idx = getBucket(peer)._1
+
+      // Upsert the counts of the index and/or IP in the maps, so that we can check the limits on them.
+      val tlc = incrementForTable(ip)
+      val blc = incrementForBucket(idx, ip)
+
+      val isOverAnyLimit =
+        limits.isOverLimitForTable(tlc(ip)) ||
+          limits.isOverLimitForBucket(blc(idx)(ip))
+
+      if (isOverAnyLimit) this
+      else {
+        copy(
+          table = table.add(peer.kademliaId),
+          tableLevelCounts = tlc,
+          bucketLevelCounts = blc
+        )
+      }
+    }
+
+  def remove(peer: Peer[A]): KBucketsWithSubnetLimits[A] =
+    if (!contains(peer)) this
+    else {
+      val ip = subnet(peer)
+      val idx = getBucket(peer)._1
+
+      val tlc = decrementForTable(ip)
+      val blc = decrementForBucket(idx, ip)
+
+      copy(table = table.remove(peer.kademliaId), tableLevelCounts = tlc, bucketLevelCounts = blc)
+    }
+
+  def closestNodes(targetKademliaId: Hash, n: Int): List[Hash] =
+    table.closestNodes(targetKademliaId, n)
+
+  def getBucket(peer: Peer[A]): (Int, TimeSet[Hash]) =
+    table.getBucket(peer.kademliaId)
+
+  private def subnet(peer: Peer[A]): InetAddress =
+    Addressable[A].getAddress(peer.address).getAddress.truncate(limits.prefixLength)
+
+  /** Increase the table level count for the IP of a subnet. */
+  private def incrementForTable(ip: InetAddress): TableLevelCounts =
+    tableLevelCounts |+| Map(ip -> 1)
+
+  /** Increase the bucket level count for the IP of a subnet. */
+  private def incrementForBucket(idx: Int, ip: InetAddress): BucketLevelCounts =
+    bucketLevelCounts |+| Map(idx -> Map(ip -> 1))
+
+  /** Decrement the table level count for the IP of a subnet and remove the entry if it's zero. */
+  private def decrementForTable(ip: InetAddress): TableLevelCounts =
+    tableLevelCounts |+| Map(ip -> -1) match {
+      case counts if counts(ip) <= 0 => counts - ip
+      case counts => counts
+    }
+
+  /** Decrement the bucket level count for the IP of a subnet and remove the entry if it's zero
+    * for the subnet itself, or the whole bucket.
+    */
+  private def decrementForBucket(idx: Int, ip: InetAddress): BucketLevelCounts =
+    bucketLevelCounts |+| Map(idx -> Map(ip -> -1)) match {
+      case counts if counts(idx)(ip) <= 0 && counts(idx).size > 1 =>
+        // The subnet count in the bucket is zero, but there are other subnets in the bucket,
+        // so keep the bucket level count and just remove the subnet from it.
+        counts.updated(idx, counts(idx) - ip)
+      case counts if counts(idx)(ip) <= 0 =>
+        // The subnet count is zero, and it's the only subnet in the bucket, so remove the bucket.
+        counts - idx
+      case counts =>
+        counts
+    }
+}
+
+object KBucketsWithSubnetLimits {
+  type SubnetCounts = Map[InetAddress, Int]
+  type TableLevelCounts = SubnetCounts
+  type BucketLevelCounts = Map[Int, SubnetCounts]
+
+  case class SubnetLimits(
+      // Number of leftmost bits of the IP address that counts as a subnet, serving as its ID.
+      prefixLength: Int,
+      // Limit of nodes from the same subnet within any given bucket in the K-table.
+      forBucket: Int,
+      // Limit of nodes from the same subnet across all buckets in the K-table.
+      forTable: Int
+  ) {
+
+    /** All limits can be disabled by setting the subnet prefix length to 0. */
+    def isEnabled: Boolean = prefixLength > 0
+
+    def isEnabledForBucket: Boolean =
+      isEnabled && forBucket > 0
+
+    def isEnabledForTable: Boolean =
+      isEnabled && forTable > 0
+
+    def isOverLimitForBucket(count: Int): Boolean =
+      isEnabledForBucket && count > forBucket
+
+    def isOverLimitForTable(count: Int): Boolean =
+      isEnabledForTable && count > forTable
+  }
+
+  object SubnetLimits {
+    val Unlimited = SubnetLimits(0, 0, 0)
+
+    def fromConfig(config: DiscoveryConfig): SubnetLimits =
+      SubnetLimits(
+        prefixLength = config.subnetLimitPrefixLength,
+        forBucket = config.subnetLimitForBucket,
+        forTable = config.subnetLimitForTable
+      )
+  }
+
+  def apply[A: Addressable](
+      node: Node,
+      limits: SubnetLimits
+  ): KBucketsWithSubnetLimits[A] = {
+    KBucketsWithSubnetLimits[A](
+      new KBuckets[Hash](node.kademliaId, clock = java.time.Clock.systemUTC()),
+      limits,
+      tableLevelCounts = Map.empty[InetAddress, Int],
+      bucketLevelCounts = Map.empty[Int, Map[InetAddress, Int]]
+    )
+  }
+}