From 46ac41365d09d7b5ea8c5076b386503fd958eddf Mon Sep 17 00:00:00 2001
From: alexpeck <alexpeck@microsoft.com>
Date: Tue, 30 Jun 2020 20:36:58 -0700
Subject: [PATCH 1/3] throughput test

---
 ...itFaster.Caching.ThroughputAnalysis.csproj |  17 ++
 .../Program.cs                                | 159 ++++++++++++++++++
 .../ThreadSafeRandom.cs                       |  26 +++
 BitFaster.sln                                 |   6 +
 README.md                                     |  11 +-
 5 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj
 create mode 100644 BitFaster.Caching.ThroughputAnalysis/Program.cs
 create mode 100644 BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs
diff --git a/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj b/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj
new file mode 100644
index 00000000..9ec81d3c
--- /dev/null
+++ b/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="CsvHelper" Version="15.0.5" />
+    <PackageReference Include="MathNet.Numerics" Version="4.11.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\BitFaster.Caching\BitFaster.Caching.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/BitFaster.Caching.ThroughputAnalysis/Program.cs b/BitFaster.Caching.ThroughputAnalysis/Program.cs
new file mode 100644
index 00000000..b262b932
--- /dev/null
+++ b/BitFaster.Caching.ThroughputAnalysis/Program.cs
@@ -0,0 +1,159 @@
+﻿using System;
+using System.Collections.Generic;
+using System.ComponentModel;
+using System.Data;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Reflection.Metadata.Ecma335;
+using System.Threading;
+using System.Threading.Tasks;
+using BitFaster.Caching.Lru;
+using CsvHelper;
+using MathNet.Numerics.Distributions;
+
+namespace BitFaster.Caching.ThroughputAnalysis
+{
+    // I’ve adapted a small test program that was used by the authors of an excellent book I’m reading, Java Concurrency 
+    // in Practice, to C#. The tests runs N threads in a tight loop, trying to retrieve a value from the dictionary. If it 
+    // exists, it attempts to remove it with a probability of 0.02; otherwise it attempts to add it with a probability of 0.6.
+
+    class Program
+    {
+        const double s = 0.86;
+        const int n = 500;
+        const int capacity = 50;
+        const int maxThreads = 52;
+        const int sampleCount = 2000;
+        const int repeatCount = 200;
+
+        private static int[] samples = new int[sampleCount];
+
+        static void Main(string[] args)
+        {
+            ThreadPool.SetMaxThreads(maxThreads, maxThreads);
+
+            Console.WriteLine("Generating input distribution...");
+            samples = new int[sampleCount];
+            Zipf.Samples(samples, s, n);
+
+            int[] threadCount = Enumerable.Range(1, maxThreads).ToArray();
+
+            // Desired output:
+            // Class       1  2  3  4  5
+            // Classic       5  6  7  7  8
+            // Concurrent    5  6  7  7  8
+            DataTable resultTable = new DataTable();
+            resultTable.Clear();
+            resultTable.Columns.Add("Class");
+            foreach (var tc in threadCount)
+            {
+                resultTable.Columns.Add(tc.ToString());
+            }
+
+            DataRow concurrentLru = resultTable.NewRow();
+            DataRow classicLru = resultTable.NewRow();
+            concurrentLru["Class"] = "concurrentLru";
+            classicLru["Class"] = "classicLru";
+
+            foreach (int tc in threadCount)
+            {
+                const int warmup = 3;
+                const int runs = 6;
+                double[] results = new double[warmup + runs];
+
+                for (int i = 0; i < warmup + runs; i++)
+                {
+                    results[i] = MeasureThroughput(new ConcurrentLru<int, int>(tc, capacity, EqualityComparer<int>.Default), tc);
+                }
+                double avg = AverageLast(results, runs) / 1000000;
+                Console.WriteLine($"ConcurrLru ({tc}) {avg} million ops/sec");
+                concurrentLru[tc.ToString()] = avg.ToString();
+
+                for (int i = 0; i < warmup + runs; i++)
+                {
+                    results[i] = MeasureThroughput(new ClassicLru<int, int>(tc, capacity, EqualityComparer<int>.Default), tc);
+                }
+                avg = AverageLast(results, runs) / 1000000;
+                Console.WriteLine($"ClassicLru ({tc}) {avg} million ops/sec");
+                classicLru[tc.ToString()] = avg.ToString();
+            }
+
+            resultTable.Rows.Add(concurrentLru);
+            resultTable.Rows.Add(classicLru);
+
+            ExportCsv(resultTable);
+
+            Console.WriteLine("Done.");
+        }
+
+        private static double AverageLast(double[] results, int count)
+        {
+            double result = 0;
+            for (int i = results.Length - count; i < results.Length; i++)
+            {
+                result = results[i];
+            }
+
+            return result / count;
+        }
+
+
+        private static double MeasureThroughput(ICache<int, int> cache, int threadCount)
+        {
+            var tasks = new Task[threadCount];
+            var sw = Stopwatch.StartNew();
+
+            for (int i = 0; i < threadCount; i++)
+            {
+                tasks[i] = Task.Run(() => Test(cache));
+            }
+
+            Task.WaitAll(tasks);
+
+            sw.Stop();
+
+            // throughput = ops/sec
+            return (threadCount * sampleCount * repeatCount) / sw.Elapsed.TotalSeconds;
+        }
+
+        private static void Test(ICache<int, int> cache)
+        {
+            // cache has 50 capacity
+            // make zipf for 500 total items, 2000 samples
+            // each thread will lookup all samples 5 times in a row, for a total of 10k GetOrAdds per thread
+            Func<int, int> func = x => x;
+
+            for (int j = 0; j < repeatCount; j++)
+            {
+                for (int i = 0; i < sampleCount; i++)
+                {
+                    cache.GetOrAdd(samples[i], func);
+                }
+            }
+        }
+
+        public static void ExportCsv(DataTable results)
+        {
+            using (var textWriter = File.CreateText(@"Results.csv"))
+            using (var csv = new CsvWriter(textWriter, CultureInfo.InvariantCulture))
+            {
+                foreach (DataColumn column in results.Columns)
+                {
+                    csv.WriteField(column.ColumnName);
+                }
+                csv.NextRecord();
+
+                foreach (DataRow row in results.Rows)
+                {
+                    for (var i = 0; i < results.Columns.Count; i++)
+                    {
+                        csv.WriteField(row[i]);
+                    }
+                    csv.NextRecord();
+                }
+            }
+        }
+    }
+}
diff --git a/BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs b/BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs
new file mode 100644
index 00000000..feb9dd35
--- /dev/null
+++ b/BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs
@@ -0,0 +1,26 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace BitFaster.Caching.ThroughputAnalysis
+{
+    public static class ThreadSafeRandom
+    {
+        [ThreadStatic]
+        private static Random _local;
+
+        private static Random _global = new Random();
+
+        public static int Next(int max)
+        {
+            Random inst = _local;
+            if (inst == null)
+            {
+                int seed;
+                lock (_global) seed = _global.Next();
+                _local = inst = new Random(seed);
+            }
+            return inst.Next(max);
+        }
+    }
+}
diff --git a/BitFaster.sln b/BitFaster.sln
index 2480daa6..6d530f67 100644
--- a/BitFaster.sln
+++ b/BitFaster.sln
@@ -16,6 +16,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.Benchmark
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.HitRateAnalysis", "BitFaster.Caching.HitRateAnalysis\BitFaster.Caching.HitRateAnalysis.csproj", "{12AAE7FB-09F5-4A87-838E-891ACEF5722B}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitFaster.Caching.ThroughputAnalysis", "BitFaster.Caching.ThroughputAnalysis\BitFaster.Caching.ThroughputAnalysis.csproj", "{EF9968AF-10B2-4205-9C42-19A594BC98C1}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -38,6 +40,10 @@ Global
 		{12AAE7FB-09F5-4A87-838E-891ACEF5722B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{12AAE7FB-09F5-4A87-838E-891ACEF5722B}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{12AAE7FB-09F5-4A87-838E-891ACEF5722B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/README.md b/README.md
index 28095c84..7fcff656 100644
--- a/README.md
+++ b/README.md
@@ -139,7 +139,7 @@ These charts summarize the percentage increase in hit rate ConcurrentLru vs LRU.
    </tr> 
 </table>
 
-## ConcurrentLru Benchmarks
+## ConcurrentLru Latency
 
 In these benchmarks, a cache miss is essentially free. These tests exist purely to compare the raw execution speed of the cache bookkeeping code. In a real setting, where a cache miss is presumably quite expensive, the relative overhead of the cache will be very small.
 
@@ -199,6 +199,15 @@ FastConcurrentLru does not allocate and is approximately 10x faster than System.
 |    RuntimeMemoryCache | 280.16 ns | 5.607 ns | 7.486 ns | 16.59 | 0.0153 |      32 B |
 | ExtensionsMemoryCache | 342.72 ns | 3.729 ns | 3.114 ns | 20.29 | 0.0114 |      24 B |
 
+
+## ConcurrentLru Throughput
+
+In this test, we generate 2000 samples of 500 keys with a Zipfian distribution (s = 0.86). Caches have size 50. From N concurrent threads, fetch the sample keys in sequence (so each thread is using the same input keys). The principal scalability limit in concurrent applications is the exclusive resource lock. As the number of threads increases, ConcurrentLru significantly outperforms an LRU implemented with a short lived exclusive lock used to synchronize the linked list data structure.
+
+This test was run on a Standard D16s v3 Azure VM (16 cpus), with .NET Core 3.1.
+
+
+
 ## Meta-programming using structs and JIT value type optimization
 
 TemplateConcurrentLru features injectable behaviors defined as structs. Structs are subject to special JIT optimizations, and the .NET JIT compiler can inline, eliminate dead code and propogate JIT time constants based on structs. Using this technique, the TemplateConcurrentLru can be customized to support LRU and TLRU policies without compromising execution speed.

From beb9e735d6f589eb08766a6be921a14432dd711a Mon Sep 17 00:00:00 2001
From: alexpeck <alexpeck@microsoft.com>
Date: Tue, 30 Jun 2020 21:29:36 -0700
Subject: [PATCH 2/3] add results

---
 ...itFaster.Caching.ThroughputAnalysis.csproj |  4 +++
 .../Program.cs                                |  4 ---
 .../Results-d16v3.csv                         |  3 +++
 .../ThreadSafeRandom.cs                       | 26 -------------------
 README.md                                     |  2 +-
 5 files changed, 8 insertions(+), 31 deletions(-)
 create mode 100644 BitFaster.Caching.ThroughputAnalysis/Results-d16v3.csv
 delete mode 100644 BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs

diff --git a/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj b/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj
index 9ec81d3c..305292e7 100644
--- a/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj
+++ b/BitFaster.Caching.ThroughputAnalysis/BitFaster.Caching.ThroughputAnalysis.csproj
@@ -5,6 +5,10 @@
     <TargetFramework>netcoreapp3.1</TargetFramework>
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+    <NoWarn>1701;1702;CS8002</NoWarn>
+  </PropertyGroup>
+
   <ItemGroup>
     <PackageReference Include="CsvHelper" Version="15.0.5" />
     <PackageReference Include="MathNet.Numerics" Version="4.11.0" />
diff --git a/BitFaster.Caching.ThroughputAnalysis/Program.cs b/BitFaster.Caching.ThroughputAnalysis/Program.cs
index b262b932..aa0555a9 100644
--- a/BitFaster.Caching.ThroughputAnalysis/Program.cs
+++ b/BitFaster.Caching.ThroughputAnalysis/Program.cs
@@ -15,10 +15,6 @@
 
 namespace BitFaster.Caching.ThroughputAnalysis
 {
-    // I’ve adapted a small test program that was used by the authors of an excellent book I’m reading, Java Concurrency 
-    // in Practice, to C#. The tests runs N threads in a tight loop, trying to retrieve a value from the dictionary. If it 
-    // exists, it attempts to remove it with a probability of 0.02; otherwise it attempts to add it with a probability of 0.6.
-
     class Program
     {
         const double s = 0.86;
diff --git a/BitFaster.Caching.ThroughputAnalysis/Results-d16v3.csv b/BitFaster.Caching.ThroughputAnalysis/Results-d16v3.csv
new file mode 100644
index 00000000..146a91d1
--- /dev/null
+++ b/BitFaster.Caching.ThroughputAnalysis/Results-d16v3.csv
@@ -0,0 +1,3 @@
+Class,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52
+concurrentLru,0.8390250696495687,0.376202024249042,0.5288059086657011,0.7093868281940474,0.7394893604119547,0.7692742628140901,0.8868098687242683,1.1103519079940203,1.1146275546334696,1.1896956651415957,1.218291000213035,1.428007110761408,1.5604813148567545,1.7652261790607608,1.7392266215766368,1.7262235364537413,1.5551914577907315,1.2065476930204835,1.6772190225591257,1.4505442732221985,1.6554578375581996,1.6428097093936829,1.383943422226727,1.541787500246927,1.4977153849517946,1.5929945003396602,1.464912894651603,1.6771227784749316,1.693182784861802,1.5899175063352249,1.708473989654639,1.676827059628992,1.5859594719591195,1.4853919456762457,1.5125032594445238,1.5540944333826734,1.5259467447885415,1.4007292749524853,1.722761892109386,1.3916878485906248,1.5881997225453819,1.7367430064460458,1.623407206289487,1.8656657878532774,1.633866434904528,1.819263632874497,1.7351197239993046,1.7420114756094713,1.603307315418468,1.6224748329005056,1.8875801666400183,1.5992687897052116
+classicLru,0.9098320086179288,0.3548161187640513,0.28308098550681976,0.2685043912718486,0.2878894384794232,0.2892307322982296,0.29284371537515264,0.28980868732332427,0.30849250561160724,0.26892877721191133,0.29921794493796816,0.299734102131323,0.29605330990959544,0.3017998708361224,0.30384554825860344,0.2789464544585001,0.30199199158096063,0.315005855171333,0.3000289725345978,0.30821724908553866,0.3074555196413347,0.31380872502367835,0.3032383329166773,0.3088044252600553,0.27653983338663085,0.2719218492421865,0.3121618896532693,0.30581076829396336,0.2688720853107021,0.3063708500881169,0.3116421735018943,0.31048221579941243,0.3056369823416011,0.31830101999889526,0.2925308136601242,0.30431060537168064,0.31187804957991944,0.3194568990601275,0.32811896351813086,0.31280839486391765,0.2798944769149913,0.31559749074747057,0.3189235194959264,0.3111095381224312,0.33029303730386816,0.28871616360715247,0.3248120583007375,0.3199919522024021,0.2834168353076367,0.3140325356925769,0.3263779096986623,0.26610456988977205
diff --git a/BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs b/BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs
deleted file mode 100644
index feb9dd35..00000000
--- a/BitFaster.Caching.ThroughputAnalysis/ThreadSafeRandom.cs
+++ /dev/null
@@ -1,26 +0,0 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Text;
-
-namespace BitFaster.Caching.ThroughputAnalysis
-{
-    public static class ThreadSafeRandom
-    {
-        [ThreadStatic]
-        private static Random _local;
-
-        private static Random _global = new Random();
-
-        public static int Next(int max)
-        {
-            Random inst = _local;
-            if (inst == null)
-            {
-                int seed;
-                lock (_global) seed = _global.Next();
-                _local = inst = new Random(seed);
-            }
-            return inst.Next(max);
-        }
-    }
-}
diff --git a/README.md b/README.md
index 7fcff656..e941fb36 100644
--- a/README.md
+++ b/README.md
@@ -202,7 +202,7 @@ FastConcurrentLru does not allocate and is approximately 10x faster than System.
 
 ## ConcurrentLru Throughput
 
-In this test, we generate 2000 samples of 500 keys with a Zipfian distribution (s = 0.86). Caches have size 50. From N concurrent threads, fetch the sample keys in sequence (so each thread is using the same input keys). The principal scalability limit in concurrent applications is the exclusive resource lock. As the number of threads increases, ConcurrentLru significantly outperforms an LRU implemented with a short lived exclusive lock used to synchronize the linked list data structure.
+In this test, we generate 2000 samples of 500 keys with a Zipfian distribution (s = 0.86). Caches have size 50. From N concurrent threads, fetch the sample keys in sequence (each thread is using the same input keys). The principal scalability limit in concurrent applications is the exclusive resource lock. As the number of threads increases, ConcurrentLru significantly outperforms an LRU implemented with a short lived exclusive lock used to synchronize the linked list data structure.
 
 This test was run on a Standard D16s v3 Azure VM (16 cpus), with .NET Core 3.1.
 

From b2acf64fee5f7223b0594e7ee780a2812142f5ee Mon Sep 17 00:00:00 2001
From: alexpeck <alexpeck@microsoft.com>
Date: Tue, 30 Jun 2020 21:40:15 -0700
Subject: [PATCH 3/3] chart

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e941fb36..6bd2f342 100644
--- a/README.md
+++ b/README.md
@@ -206,7 +206,7 @@ In this test, we generate 2000 samples of 500 keys with a Zipfian distribution (
 
 This test was run on a Standard D16s v3 Azure VM (16 cpus), with .NET Core 3.1.
 
-
+![image](https://user-images.githubusercontent.com/12851828/86203563-2f941880-bb1a-11ea-8d6a-70ece91b4362.png)
 
 ## Meta-programming using structs and JIT value type optimization