From 3b09d127dea0420b91accc777dc6a8dd2f785a21 Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 19:36:11 +0530 Subject: [PATCH 01/14] Add breast cancer detection algorithm --- quantum/breast-cancer.py | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 quantum/breast-cancer.py diff --git a/quantum/breast-cancer.py b/quantum/breast-cancer.py new file mode 100644 index 000000000000..4bcbd4c00b90 --- /dev/null +++ b/quantum/breast-cancer.py @@ -0,0 +1,53 @@ +import numpy as np +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.decomposition import PCA +from qiskit.aqua import MissingOptionalLibraryError + + +def breast_cancer(training_size, test_size, n, plot_data=False): + """ returns breast cancer dataset """ + class_labels = [r'A', r'B'] + data, target = datasets.load_breast_cancer(return_X_y=True) + sample_train, sample_test, label_train, label_test = \ + train_test_split(data, target, test_size=0.3, random_state=12) + + # Now we standardize for gaussian around 0 with unit variance + std_scale = StandardScaler().fit(sample_train) + sample_train = std_scale.transform(sample_train) + sample_test = std_scale.transform(sample_test) + + # Now reduce number of features to number of qubits + pca = PCA(n_components=n).fit(sample_train) + sample_train = pca.transform(sample_train) + sample_test = pca.transform(sample_test) + + # Scale to the range (-1,+1) + samples = np.append(sample_train, sample_test, axis=0) + minmax_scale = MinMaxScaler((-1, 1)).fit(samples) + sample_train = minmax_scale.transform(sample_train) + sample_test = minmax_scale.transform(sample_test) + + # Pick training size number of samples from each distro + training_input = {key: (sample_train[label_train == k, :])[:training_size] + for k, key in enumerate(class_labels)} + test_input = {key: (sample_test[label_test == k, :])[:test_size] + for k, key in enumerate(class_labels)} + + if plot_data: + try: + import matplotlib.pyplot as plt + except ImportError as ex: + raise MissingOptionalLibraryError( + libname='Matplotlib', + name='breast_cancer', + pip_install='pip install matplotlib') from ex + for k in range(0, 2): + plt.scatter(sample_train[label_train == k, 0][:training_size], + sample_train[label_train == k, 1][:training_size]) + + plt.title("PCA dim. reduced Breast cancer dataset") + plt.show() + + return sample_train, training_input, test_input, class_labels From 9743b923cc24a5e170dab2ef426f92b1bf7e15dd Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Wed, 7 Oct 2020 14:06:57 +0000 Subject: [PATCH 02/14] fixup! Format Python code with psf/black push --- quantum/breast-cancer.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/quantum/breast-cancer.py b/quantum/breast-cancer.py index 4bcbd4c00b90..c4b86dd16b13 100644 --- a/quantum/breast-cancer.py +++ b/quantum/breast-cancer.py @@ -1,17 +1,18 @@ import numpy as np +from qiskit.aqua import MissingOptionalLibraryError from sklearn import datasets -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.decomposition import PCA -from qiskit.aqua import MissingOptionalLibraryError +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler, StandardScaler def breast_cancer(training_size, test_size, n, plot_data=False): """ returns breast cancer dataset """ - class_labels = [r'A', r'B'] + class_labels = [r"A", r"B"] data, target = datasets.load_breast_cancer(return_X_y=True) - sample_train, sample_test, label_train, label_test = \ - train_test_split(data, target, test_size=0.3, random_state=12) + sample_train, sample_test, label_train, label_test = train_test_split( + data, target, test_size=0.3, random_state=12 + ) # Now we standardize for gaussian around 0 with unit variance std_scale = StandardScaler().fit(sample_train) @@ -30,22 +31,29 @@ def breast_cancer(training_size, test_size, n, plot_data=False): sample_test = minmax_scale.transform(sample_test) # Pick training size number of samples from each distro - training_input = {key: (sample_train[label_train == k, :])[:training_size] - for k, key in enumerate(class_labels)} - test_input = {key: (sample_test[label_test == k, :])[:test_size] - for k, key in enumerate(class_labels)} + training_input = { + key: (sample_train[label_train == k, :])[:training_size] + for k, key in enumerate(class_labels) + } + test_input = { + key: (sample_test[label_test == k, :])[:test_size] + for k, key in enumerate(class_labels) + } if plot_data: try: import matplotlib.pyplot as plt except ImportError as ex: raise MissingOptionalLibraryError( - libname='Matplotlib', - name='breast_cancer', - pip_install='pip install matplotlib') from ex + libname="Matplotlib", + name="breast_cancer", + pip_install="pip install matplotlib", + ) from ex for k in range(0, 2): - plt.scatter(sample_train[label_train == k, 0][:training_size], - sample_train[label_train == k, 1][:training_size]) + plt.scatter( + sample_train[label_train == k, 0][:training_size], + sample_train[label_train == k, 1][:training_size], + ) plt.title("PCA dim. reduced Breast cancer dataset") plt.show() From fccec223ccbfc2bb7e60be41dbf1d1f1be9dccbc Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 19:49:57 +0530 Subject: [PATCH 03/14] Add qiskit --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 31dc586c29db..67d9bbbd8448 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ numpy opencv-python pandas pillow +qiskit requests scikit-fuzzy sklearn From 54752797baab20158ed0b929ae6877ad016d9715 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Wed, 7 Oct 2020 14:20:28 +0000 Subject: [PATCH 04/14] updating DIRECTORY.md --- DIRECTORY.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 6a3d31709ed6..78945d060611 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -662,6 +662,9 @@ * [Sol1](https://github.com/TheAlgorithms/Python/blob/master/project_euler/problem_99/sol1.py) * [Validate Solutions](https://github.com/TheAlgorithms/Python/blob/master/project_euler/validate_solutions.py) +## Quantum + * [Breast-Cancer](https://github.com/TheAlgorithms/Python/blob/master/quantum/breast-cancer.py) + ## Scheduling * [First Come First Served](https://github.com/TheAlgorithms/Python/blob/master/scheduling/first_come_first_served.py) * [Round Robin](https://github.com/TheAlgorithms/Python/blob/master/scheduling/round_robin.py) From 3a2aa88e04bfec414e4a11b028b7aca922fb5854 Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 19:56:50 +0530 Subject: [PATCH 05/14] Remove hyphen from filename --- quantum/{breast-cancer.py => breast_cancer.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename quantum/{breast-cancer.py => breast_cancer.py} (100%) diff --git a/quantum/breast-cancer.py b/quantum/breast_cancer.py similarity index 100% rename from quantum/breast-cancer.py rename to quantum/breast_cancer.py From 1a5ae9356cd9b4a6360001ea6628f81b7c13fc87 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Wed, 7 Oct 2020 14:27:17 +0000 Subject: [PATCH 06/14] updating DIRECTORY.md --- DIRECTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 78945d060611..e2f34de6abea 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -663,7 +663,7 @@ * [Validate Solutions](https://github.com/TheAlgorithms/Python/blob/master/project_euler/validate_solutions.py) ## Quantum - * [Breast-Cancer](https://github.com/TheAlgorithms/Python/blob/master/quantum/breast-cancer.py) + * [Breast Cancer](https://github.com/TheAlgorithms/Python/blob/master/quantum/breast_cancer.py) ## Scheduling * [First Come First Served](https://github.com/TheAlgorithms/Python/blob/master/scheduling/first_come_first_served.py) From 02fac3fc5921b5fc079f756e1c461fdaea5ef74c Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 20:24:08 +0530 Subject: [PATCH 07/14] Remove qiskit requirement --- quantum/breast_cancer.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index c4b86dd16b13..7243649fc7a9 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -1,5 +1,4 @@ import numpy as np -from qiskit.aqua import MissingOptionalLibraryError from sklearn import datasets from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split @@ -43,12 +42,6 @@ def breast_cancer(training_size, test_size, n, plot_data=False): if plot_data: try: import matplotlib.pyplot as plt - except ImportError as ex: - raise MissingOptionalLibraryError( - libname="Matplotlib", - name="breast_cancer", - pip_install="pip install matplotlib", - ) from ex for k in range(0, 2): plt.scatter( sample_train[label_train == k, 0][:training_size], From 3e15438b5572df68bd3aa368bbb869701e78c4ea Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 20:32:56 +0530 Subject: [PATCH 08/14] Add ImportError exception --- quantum/breast_cancer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index 7243649fc7a9..fd6f947e41ce 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -42,6 +42,8 @@ def breast_cancer(training_size, test_size, n, plot_data=False): if plot_data: try: import matplotlib.pyplot as plt + except ImportError as e: + print(e) for k in range(0, 2): plt.scatter( sample_train[label_train == k, 0][:training_size], From 2f75396fdd7da4e22ba08ca890704915c8f9260c Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 20:42:01 +0530 Subject: [PATCH 09/14] Remove unused requirement qiskit --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 67d9bbbd8448..31dc586c29db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,6 @@ numpy opencv-python pandas pillow -qiskit requests scikit-fuzzy sklearn From 577843f83a0d2535b22729eb2f030ed48a01f827 Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 22:20:57 +0530 Subject: [PATCH 10/14] Adding doctests --- quantum/breast_cancer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index fd6f947e41ce..507ecd958468 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -6,7 +6,13 @@ def breast_cancer(training_size, test_size, n, plot_data=False): - """ returns breast cancer dataset """ + """ returns breast cancer dataset + + Wikipedia reference: https://en.m.wikipedia.org/wiki/Breast_cancer + + >>> breast_cancer(10, 0.3, 7) + 24.9 + """ class_labels = [r"A", r"B"] data, target = datasets.load_breast_cancer(return_X_y=True) sample_train, sample_test, label_train, label_test = train_test_split( From 58dc11dd8b0c84df2642b8fccd753fcb7eab3c9f Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Wed, 7 Oct 2020 16:51:44 +0000 Subject: [PATCH 11/14] fixup! Format Python code with psf/black push --- quantum/breast_cancer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index 507ecd958468..a2d1ccf9239e 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -6,10 +6,10 @@ def breast_cancer(training_size, test_size, n, plot_data=False): - """ returns breast cancer dataset - + """returns breast cancer dataset + Wikipedia reference: https://en.m.wikipedia.org/wiki/Breast_cancer - + >>> breast_cancer(10, 0.3, 7) 24.9 """ From fb24067f64a4242b8b429d62391427b766f26e42 Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 22:27:02 +0530 Subject: [PATCH 12/14] Update breast_cancer.py --- quantum/breast_cancer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index a2d1ccf9239e..27134fd798b6 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -10,7 +10,7 @@ def breast_cancer(training_size, test_size, n, plot_data=False): Wikipedia reference: https://en.m.wikipedia.org/wiki/Breast_cancer - >>> breast_cancer(10, 0.3, 7) + >>> breast_cancer(10, 4, 7) 24.9 """ class_labels = [r"A", r"B"] From 38b4fe4c17441ba3c9117b3ed8d5435455034f75 Mon Sep 17 00:00:00 2001 From: Prakhar Gurunani Date: Wed, 7 Oct 2020 22:40:10 +0530 Subject: [PATCH 13/14] Correct Doctest, plot_data, updated typehints --- quantum/breast_cancer.py | 77 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index 27134fd798b6..dea5431cb01f 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -5,13 +5,78 @@ from sklearn.preprocessing import MinMaxScaler, StandardScaler -def breast_cancer(training_size, test_size, n, plot_data=False): +def breast_cancer(training_size, test_size, n, plot_data): """returns breast cancer dataset Wikipedia reference: https://en.m.wikipedia.org/wiki/Breast_cancer + The function return four values: return sample_train, training_input, test_input, class_labels - >>> breast_cancer(10, 4, 7) - 24.9 + >>> breast_cancer(10, 4, 7, False) + (array([[-0.80381589, -0.17496736, 0.02153529, ..., -0.1397565 , + -0.4738706 , -0.21959184], + [-0.19172231, -0.48047638, -0.5502895 , ..., -0.02604959, + -0.2322415 , -0.19774116], + [-0.8333448 , -0.25876499, -0.26384514, ..., -0.32234254, + -0.17787827, 0.01960881], + ..., + [-0.88227162, -0.50950835, -0.48471936, ..., 0.06051423, + 0.05369423, -0.01157853], + [-0.36471423, -0.4851768 , -0.58003495, ..., -0.00202821, + -0.1576751 , -0.25529689], + [-0.81134279, -0.25945047, -0.54472716, ..., -0.06312866, + -0.14663466, 0.03603051]]), {'A': array([[-0.19172231, -0.48047638, -0.5502895 , 0.62866253, -0.02604959, + -0.2322415 , -0.19774116], + [-0.08931675, -0.69047276, -0.34473474, 0.2430907 , 0.06598203, + -0.43104084, -0.15886876], + [-0.50124222, -0.38404633, -0.66125808, 0.09100724, 0.10701545, + -0.13283072, -0.0307179 ], + [-0.04524656, -0.06299861, -0.5886886 , 0.33959658, -0.09071451, + -0.43135931, 0.02902253], + [-0.36503477, -0.4966686 , -0.20393198, 0.56148734, 0.18131077, + -0.21103257, -0.14178392], + [-0.44318575, -0.59164354, -0.29118452, 0.04301003, -0.18131692, + 0.20621245, -0.09727456], + [-0.4146524 , -0.22790637, -0.69929679, 0.32207269, 0.10755013, + -0.33974676, -0.11637961], + [ 0.18070867, -0.79119457, -0.29254301, 0.04961691, -0.14589901, + -0.41185356, -0.01666176], + [-0.05916688, -0.00087508, -0.31287342, 0.35885527, 0.1041368 , + -0.34113249, 0.08670244], + [ 0.17702071, -0.80782303, -0.06138567, -0.18814197, 0.04569151, + -0.10576475, 0.15044493]]), 'B': array([[-0.80381589, -0.17496736, 0.02153529, -0.07187231, -0.1397565 , + -0.4738706 , -0.21959184], + [-0.8333448 , -0.25876499, -0.26384514, 0.1756967 , -0.32234254, + -0.17787827, 0.01960881], + [-0.70730804, -0.29054463, -0.44993423, 0.03257197, -0.10219939, + -0.25074147, -0.08352498], + [-0.79008762, -0.3313085 , -0.28353916, 0.19137684, -0.42320134, + -0.036884 , -0.24138241], + [-0.67599686, -0.50282222, -0.41082298, 0.05409269, 0.08031647, + -0.19597253, -0.10733675], + [-0.67547425, -0.15140441, -0.52477832, 0.02062338, -0.10116737, + -0.13453581, -0.07837964], + [-0.79041979, -0.352277 , -0.39978347, 0.24391654, -0.31451085, + 0.11470016, -0.1341472 ], + [-0.78150554, -0.41353571, -0.48452891, -0.05467579, 0.03033078, + -0.32269787, -0.06426137], + [-0.76212084, -0.40881569, -0.48040504, 0.1081265 , 0.02596033, + -0.20427163, 0.09202506], + [-0.74304803, -0.36890094, -0.41951978, 0.2119361 , -0.17708908, + -0.08143223, -0.20604129]])}, {'A': array([[-0.10389794, 0.17924751, -0.67583766, 0.55657289, 0.11036026, + -0.13329861, 0.10039272], + [-0.53086407, -0.4573074 , -0.17817215, 0.40169334, -0.08631139, + -0.27384232, -0.31916126], + [-0.21040012, -0.42492679, -0.16556034, -0.00830156, -0.28627151, + 0.05469112, -0.08206313], + [-0.02115007, -0.46893251, -0.14647214, 0.31570857, 0.03058135, + -0.5329338 , 0.28794908]]), 'B': array([[-0.82652554, -0.4336021 , -0.28307103, 0.32561542, -0.15712862, + -0.23854447, -0.08516069], + [-0.73291597, -0.5064755 , -0.35806246, 0.53985827, -0.15899323, + -0.2870263 , -0.15924347], + [-0.90409716, -0.00762145, -0.43410622, -0.03133913, -0.50751343, + -0.42602392, 0.28785164], + [-0.7863327 , -0.3957758 , -0.37520181, 0.27611366, -0.14239159, + -0.18478963, 0.01562388]])}, ['A', 'B']) """ class_labels = [r"A", r"B"] data, target = datasets.load_breast_cancer(return_X_y=True) @@ -45,6 +110,10 @@ def breast_cancer(training_size, test_size, n, plot_data=False): for k, key in enumerate(class_labels) } + return sample_train, training_input, test_input, class_labels + + # plot_data can be set to true to plot graph of array output + if plot_data: try: import matplotlib.pyplot as plt @@ -58,5 +127,3 @@ def breast_cancer(training_size, test_size, n, plot_data=False): plt.title("PCA dim. reduced Breast cancer dataset") plt.show() - - return sample_train, training_input, test_input, class_labels From 7607eae1c2960d78838e5f203d4342037ffee663 Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Wed, 7 Oct 2020 17:11:01 +0000 Subject: [PATCH 14/14] fixup! Format Python code with psf/black push --- quantum/breast_cancer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantum/breast_cancer.py b/quantum/breast_cancer.py index dea5431cb01f..062d15a54aa9 100644 --- a/quantum/breast_cancer.py +++ b/quantum/breast_cancer.py @@ -113,7 +113,7 @@ def breast_cancer(training_size, test_size, n, plot_data): return sample_train, training_input, test_input, class_labels # plot_data can be set to true to plot graph of array output - + if plot_data: try: import matplotlib.pyplot as plt