From 3d4de85430f4d9c7a25751b97a8af5fb835d5a4c Mon Sep 17 00:00:00 2001
From: Maxim Rebguns <mrmaxguns@gmail.com>
Date: Wed, 6 May 2020 09:28:12 -0500
Subject: [PATCH 1/9] added decrypt_caesar_with_chi_squared.py and ran all
 checks

---
 ciphers/decrypt_caesar_with_chi_squared.py | 225 +++++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100644 ciphers/decrypt_caesar_with_chi_squared.py

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
new file mode 100644
index 000000000000..acc4c7b60fbb
--- /dev/null
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -0,0 +1,225 @@
+def decrypt_caesar_with_chi_squared(
+    ciphertext: str,
+    cipher_alphabet: list = [],
+    frequencies_dict: dict = {},
+    case_sensetive: bool = False,
+) -> list:
+    """
+    Basic Usage
+    ===========
+    Arguments:
+    * ciphertext (str): the text to decode (encoded with the caesar cipher)
+
+    Optional Arguments:
+    * cipher_alphabet (list): the alphabet used for the cipher (each letter is
+      a string separated by commas)
+    * frequencies_dict (dict): a dictionary of word frequencies where keys are
+      the letters and values are a percentage representation of the frequency as
+      a decimal/float
+    * case_sensetive (bool): a boolean value: True if the case matters during
+      decryption, False if it doesn't
+
+    Returns:
+    * A list in the form of:
+      [
+        most_likely_cipher,
+        most_likely_cipher_chi_squared_value,
+        decoded_most_likely_cipher
+      ]
+
+      where...
+      - most_likely_cipher is an integer representing the shift of the smallest
+        chi-squared statistic (most likely key)
+      - most_likely_cipher_chi_squared_value is a float representing the
+        chi-squared statistic of the most likely shift
+      - decoded_most_likely_cipher is a string with the decoded cipher
+        (decoded by the most_likely_cipher key)
+
+
+    The Chi-squared test
+    ====================
+
+    The caesar cipher
+    -----------------
+    The caesar cipher is a very insecure encryption algorithm, however it has
+    been used since Julius Caesar. The cipher is a simple substitution cipher
+    where each character in the plain text is replaced by a character in the
+    alphabet a certain number of characters after the original character. The
+    number of characters away is called the shift or key. For example:
+
+    Plain text: hello
+    Key: 1
+    Cipher text: ifmmp
+    (each letter in hello has been shifted one to the right in the eng. alphabet)
+
+    As you can imagine, this doesn't provide lots of security. In fact
+    decrypting ciphertext by brute-force is extremely easy even by hand. However
+     one way to do that is the chi-squared test.
+
+    The chi-squared test
+    -------------------
+    Each letter in the english alphabet has a frequency, or the amount of times
+    it shows up compared to other letters (usually expressed as a decimal
+    representing the percentage likelyhood). The most common letter in the
+    english language is "e" with a frequency of 0.11162 or 11.162%. The test is
+    completed in the following fashion.
+
+    1. The ciphertext is decoded in a brute force way (every combination of the
+       26 possible combinations)
+    2. For every combination, for each letter in the combination, the average
+       amount of times the letter should appear the message is calculated by
+       multiplying the total number of characters by the frequency of the letter
+
+       For example:
+       In a message of 100 characters, e should appear around 11.162 times.
+
+     3. Then, to calculate the margin of error (the amount of times the letter
+        SHOULD appear with the amount of times the letter DOES appear), we use
+        the chi-squared test. The following formula is used:
+
+        Let:
+        - n be the number of times the letter actually appears
+        - p be the predicted value of the number of times the letter should
+          appear (see #2)
+        - let v be the chi-squared test result (reffered to here as chi-squared
+          value/statistic)
+
+        (n - p)^2
+        --------- = v
+           p
+
+    4. Each chi squared value for each letter is then added up to the total.
+       The total is the chi-squared statistic for that encryption key.
+    5. The encryption key with the lowest chi-squared value is the most likely
+       to be the decoded answer.
+
+    Further Reading
+    ================
+
+    * http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/
+    * https://en.wikipedia.org/wiki/Letter_frequency
+    * https://en.wikipedia.org/wiki/Chi-squared_test
+    * https://en.m.wikipedia.org/wiki/Caesar_cipher
+
+    Doctests
+    ========
+    >>> decrypt_caesar_with_chi_squared('dof pz aol jhlzhy jpwoly zv wvwbshy? pa pz avv lhzf av jyhjr!')
+    [7, 3129.228005747531, 'why is the caesar cipher so popular? it is too easy to crack!']
+    """
+    if cipher_alphabet == []:
+        # get list of all leters in english alphabet
+        alphabet_letters = [chr(i) for i in range(97, 123)]
+    else:
+        # Set alphabet_letters to the custom alphabet
+        alphabet_letters = cipher_alphabet
+
+    if frequencies_dict == {}:
+        # Frequencies of letters in the english language (how much they show up)
+        frequencies = {
+            "a": 0.08497,
+            "b": 0.01492,
+            "c": 0.02202,
+            "d": 0.04253,
+            "e": 0.11162,
+            "f": 0.02228,
+            "g": 0.02015,
+            "h": 0.06094,
+            "i": 0.07546,
+            "j": 0.00153,
+            "k": 0.01292,
+            "l": 0.04025,
+            "m": 0.02406,
+            "n": 0.06749,
+            "o": 0.07507,
+            "p": 0.01929,
+            "q": 0.00095,
+            "r": 0.07587,
+            "s": 0.06327,
+            "t": 0.09356,
+            "u": 0.02758,
+            "v": 0.00978,
+            "w": 0.02560,
+            "x": 0.00150,
+            "y": 0.01994,
+            "z": 0.00077,
+        }
+    else:
+        # Custom frequencies dictionary
+        frequencies = frequencies_dict
+
+    if not case_sensetive:
+        ciphertext = ciphertext.lower()
+
+    # Chi squared statistic values
+    chi_squared_statistic_values = {}
+
+    # cycle through all of the shifts
+    for shift in range(len(alphabet_letters)):
+        decrypted_with_shift = ""
+
+        # decrypt the message with the shift
+        for letter in ciphertext:
+            try:
+                # Try to index the letter in the alphabet
+                new_key = (alphabet_letters.index(letter) - shift) % len(
+                    alphabet_letters
+                )
+                decrypted_with_shift += alphabet_letters[new_key]
+            except ValueError:
+                # Append the character if it isn't in the alphabet
+                decrypted_with_shift += letter
+
+        chi_squared_statistic = 0
+
+        # Loop through each letter in the decoded message with the shift
+        for letter in decrypted_with_shift:
+            if case_sensetive:
+                if letter in frequencies:
+                    # Get the amount of times the letter occurs in the message
+                    occurences = decrypted_with_shift.count(letter)
+
+                    # Get the excepcted amount of times the letter should appear based on letter frequencies
+                    expected = frequencies[letter] * occurences
+
+                    # Complete the chi squared statistic formula
+                    chi_letter_value = ((occurences - expected) ** 2) / expected
+
+                    # Add the margin of error to the total chi squared statistic
+                    chi_squared_statistic += chi_letter_value
+            else:
+                if letter.lower() in frequencies:
+                    # Get the amount of times the letter occurs in the message
+                    occurences = decrypted_with_shift.count(letter)
+
+                    # Get the excepcted amount of times the letter should appear based on letter frequencies
+                    expected = frequencies[letter] * occurences
+
+                    # Complete the chi squared statistic formula
+                    chi_letter_value = ((occurences - expected) ** 2) / expected
+
+                    # Add the margin of error to the total chi squared statistic
+                    chi_squared_statistic += chi_letter_value
+
+        # Add the data to the chi_squared_statistic_values dictionary
+        chi_squared_statistic_values[shift] = [
+            chi_squared_statistic,
+            decrypted_with_shift,
+        ]
+
+    # Get the most likely cipher by finding the cipher with the smallest chi squared statistic
+    most_likely_cipher = min(
+        chi_squared_statistic_values, key=chi_squared_statistic_values.get
+    )
+
+    # Get all the data from the most likely cipher (key, decoded message)
+    most_likely_cipher_chi_squared_value = chi_squared_statistic_values[
+        most_likely_cipher
+    ][0]
+    decoded_most_likely_cipher = chi_squared_statistic_values[most_likely_cipher][1]
+
+    # Return the data on the most likely shift
+    return [
+        most_likely_cipher,
+        most_likely_cipher_chi_squared_value,
+        decoded_most_likely_cipher,
+    ]

From 5547591fb484041fb3dd93df103e0546710a4360 Mon Sep 17 00:00:00 2001
From: Maxim R <49735721+mrmaxguns@users.noreply.github.com>
Date: Wed, 6 May 2020 10:50:34 -0500
Subject: [PATCH 2/9] Updated default parameters

Removed mistake with mutable default arguments

Co-authored-by: Christian Clauss <cclauss@me.com>
---
 ciphers/decrypt_caesar_with_chi_squared.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index acc4c7b60fbb..94f108157f0e 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -1,7 +1,7 @@
 def decrypt_caesar_with_chi_squared(
     ciphertext: str,
-    cipher_alphabet: list = [],
-    frequencies_dict: dict = {},
+    cipher_alphabet = None,
+    frequencies_dict = None,
     case_sensetive: bool = False,
 ) -> list:
     """

From 479c866427eec4b9c6ca3c9fcadd647cd83d0dcb Mon Sep 17 00:00:00 2001
From: Maxim R <49735721+mrmaxguns@users.noreply.github.com>
Date: Wed, 6 May 2020 10:52:05 -0500
Subject: [PATCH 3/9] Updated handling for optional arguments

Co-authored-by: Christian Clauss <cclauss@me.com>
---
 ciphers/decrypt_caesar_with_chi_squared.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index 94f108157f0e..557f5c09ca0f 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -106,7 +106,10 @@ def decrypt_caesar_with_chi_squared(
     >>> decrypt_caesar_with_chi_squared('dof pz aol jhlzhy jpwoly zv wvwbshy? pa pz avv lhzf av jyhjr!')
     [7, 3129.228005747531, 'why is the caesar cipher so popular? it is too easy to crack!']
     """
-    if cipher_alphabet == []:
+    cipher_alphabet = cipher_alphabet or []
+    frequencies_dict = frequencies_dict or {}
+
+    if not cipher_alphabet:
         # get list of all leters in english alphabet
         alphabet_letters = [chr(i) for i in range(97, 123)]
     else:

From 0850d89ba10b83dcd572271d85226c3e0404ff0e Mon Sep 17 00:00:00 2001
From: Maxim R <49735721+mrmaxguns@users.noreply.github.com>
Date: Wed, 6 May 2020 10:58:56 -0500
Subject: [PATCH 4/9] Changed return statement to tuple

Made function return a tuple instead of a list
---
 ciphers/decrypt_caesar_with_chi_squared.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index 557f5c09ca0f..9eeaaf9cba30 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -20,12 +20,12 @@ def decrypt_caesar_with_chi_squared(
       decryption, False if it doesn't
 
     Returns:
-    * A list in the form of:
-      [
+    * A tuple in the form of:
+      (
         most_likely_cipher,
         most_likely_cipher_chi_squared_value,
         decoded_most_likely_cipher
-      ]
+      )
 
       where...
       - most_likely_cipher is an integer representing the shift of the smallest
@@ -221,8 +221,8 @@ def decrypt_caesar_with_chi_squared(
     decoded_most_likely_cipher = chi_squared_statistic_values[most_likely_cipher][1]
 
     # Return the data on the most likely shift
-    return [
+    return (
         most_likely_cipher,
         most_likely_cipher_chi_squared_value,
         decoded_most_likely_cipher,
-    ]
+    )

From bb587af13b07c24c331b73d9205d8599c3eb650a Mon Sep 17 00:00:00 2001
From: Maxim Rebguns <mrmaxguns@gmail.com>
Date: Wed, 6 May 2020 11:10:08 -0500
Subject: [PATCH 5/9] Added more doctests

---
 ciphers/decrypt_caesar_with_chi_squared.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index 9eeaaf9cba30..0030cdce9cd9 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -104,7 +104,14 @@ def decrypt_caesar_with_chi_squared(
     Doctests
     ========
     >>> decrypt_caesar_with_chi_squared('dof pz aol jhlzhy jpwoly zv wvwbshy? pa pz avv lhzf av jyhjr!')
-    [7, 3129.228005747531, 'why is the caesar cipher so popular? it is too easy to crack!']
+    (7, 3129.228005747531, 'why is the caesar cipher so popular? it is too easy to crack!')
+
+    >>> decrypt_caesar_with_chi_squared('crybd cdbsxq')
+    (10, 233.35343938980898, 'short string')
+
+    >>> decrypt_caesar_with_chi_squared(12)
+    Traceback (most recent call last):
+    AttributeError: 'int' object has no attribute 'lower'
     """
     cipher_alphabet = cipher_alphabet or []
     frequencies_dict = frequencies_dict or {}

From 915c01649ecd864019fa9d042b3402f4ea7a1091 Mon Sep 17 00:00:00 2001
From: Maxim Rebguns <mrmaxguns@gmail.com>
Date: Wed, 6 May 2020 11:11:51 -0500
Subject: [PATCH 6/9] Fixed spelling mistakes

---
 ciphers/decrypt_caesar_with_chi_squared.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index 0030cdce9cd9..10b90e1abb48 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -60,7 +60,7 @@ def decrypt_caesar_with_chi_squared(
     -------------------
     Each letter in the english alphabet has a frequency, or the amount of times
     it shows up compared to other letters (usually expressed as a decimal
-    representing the percentage likelyhood). The most common letter in the
+    representing the percentage likelihood). The most common letter in the
     english language is "e" with a frequency of 0.11162 or 11.162%. The test is
     completed in the following fashion.
 
@@ -81,7 +81,7 @@ def decrypt_caesar_with_chi_squared(
         - n be the number of times the letter actually appears
         - p be the predicted value of the number of times the letter should
           appear (see #2)
-        - let v be the chi-squared test result (reffered to here as chi-squared
+        - let v be the chi-squared test result (referred to here as chi-squared
           value/statistic)
 
         (n - p)^2
@@ -186,26 +186,26 @@ def decrypt_caesar_with_chi_squared(
             if case_sensetive:
                 if letter in frequencies:
                     # Get the amount of times the letter occurs in the message
-                    occurences = decrypted_with_shift.count(letter)
+                    occurrences = decrypted_with_shift.count(letter)
 
                     # Get the excepcted amount of times the letter should appear based on letter frequencies
-                    expected = frequencies[letter] * occurences
+                    expected = frequencies[letter] * occurrences
 
                     # Complete the chi squared statistic formula
-                    chi_letter_value = ((occurences - expected) ** 2) / expected
+                    chi_letter_value = ((occurrences - expected) ** 2) / expected
 
                     # Add the margin of error to the total chi squared statistic
                     chi_squared_statistic += chi_letter_value
             else:
                 if letter.lower() in frequencies:
                     # Get the amount of times the letter occurs in the message
-                    occurences = decrypted_with_shift.count(letter)
+                    occurrences = decrypted_with_shift.count(letter)
 
                     # Get the excepcted amount of times the letter should appear based on letter frequencies
-                    expected = frequencies[letter] * occurences
+                    expected = frequencies[letter] * occurrences
 
                     # Complete the chi squared statistic formula
-                    chi_letter_value = ((occurences - expected) ** 2) / expected
+                    chi_letter_value = ((occurrences - expected) ** 2) / expected
 
                     # Add the margin of error to the total chi squared statistic
                     chi_squared_statistic += chi_letter_value

From 02accd6c73538968bc6f24f0a4a1ab7bca2ab2db Mon Sep 17 00:00:00 2001
From: Maxim Rebguns <mrmaxguns@gmail.com>
Date: Wed, 6 May 2020 11:22:21 -0500
Subject: [PATCH 7/9] black . - reformatted decrypt_caesar_with_chi_squared.py

---
 ciphers/decrypt_caesar_with_chi_squared.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index 10b90e1abb48..cbecb9b72398 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -1,7 +1,7 @@
 def decrypt_caesar_with_chi_squared(
     ciphertext: str,
-    cipher_alphabet = None,
-    frequencies_dict = None,
+    cipher_alphabet=None,
+    frequencies_dict=None,
     case_sensetive: bool = False,
 ) -> list:
     """

From 177ac0dcbba7e02732802af8a193c774d72af389 Mon Sep 17 00:00:00 2001
From: Maxim Rebguns <mrmaxguns@gmail.com>
Date: Wed, 6 May 2020 11:41:58 -0500
Subject: [PATCH 8/9] Updated if statements to fit the updated code

---
 ciphers/decrypt_caesar_with_chi_squared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index cbecb9b72398..04ec2a112940 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -116,7 +116,7 @@ def decrypt_caesar_with_chi_squared(
     cipher_alphabet = cipher_alphabet or []
     frequencies_dict = frequencies_dict or {}
 
-    if not cipher_alphabet:
+    if cipher_alphabet == []:
         # get list of all leters in english alphabet
         alphabet_letters = [chr(i) for i in range(97, 123)]
     else:

From a056cebef0a7eacc039a87a0484a7df9eb45c987 Mon Sep 17 00:00:00 2001
From: Maxim Rebguns <mrmaxguns@gmail.com>
Date: Wed, 6 May 2020 11:47:17 -0500
Subject: [PATCH 9/9] Minimized amount of lines in the code.

---
 ciphers/decrypt_caesar_with_chi_squared.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
index 04ec2a112940..3c37631c7b35 100644
--- a/ciphers/decrypt_caesar_with_chi_squared.py
+++ b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -113,16 +113,9 @@ def decrypt_caesar_with_chi_squared(
     Traceback (most recent call last):
     AttributeError: 'int' object has no attribute 'lower'
     """
-    cipher_alphabet = cipher_alphabet or []
+    alphabet_letters = cipher_alphabet or [chr(i) for i in range(97, 123)]
     frequencies_dict = frequencies_dict or {}
 
-    if cipher_alphabet == []:
-        # get list of all leters in english alphabet
-        alphabet_letters = [chr(i) for i in range(97, 123)]
-    else:
-        # Set alphabet_letters to the custom alphabet
-        alphabet_letters = cipher_alphabet
-
     if frequencies_dict == {}:
         # Frequencies of letters in the english language (how much they show up)
         frequencies = {