From 37cdb2f9ffecda7acd307736284d36c0dbb1ed6b Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Wed, 25 Jan 2023 18:52:20 -0500 Subject: [PATCH 01/17] Update test.sh adding ml explainability to testing --- notebooks/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/test.sh b/notebooks/test.sh index 754b50b8e..ddb23543c 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -11,8 +11,8 @@ if [[ -r /etc/git_commit ]]; then fi # Filter by tracks if first argument set. -TRACKS="intro_to_programming time_series ethics feature_engineering_new computer_vision deep_learning_intro pandas python machine_learning sql data_viz_to_coder ml_intermediate sql_advanced feature_engineering geospatial nlp game_ai data_cleaning" -TESTABLE_NOTEBOOK_TRACKS="intro_to_programming geospatial time_series ethics feature_engineering_new data_viz_to_coder ml_intermediate data_cleaning computer_vision deep_learning_intro python pandas machine_learning game_ai" +TRACKS="ml_explainability intro_to_programming time_series ethics feature_engineering_new computer_vision deep_learning_intro pandas python machine_learning sql data_viz_to_coder ml_intermediate sql_advanced feature_engineering geospatial nlp game_ai data_cleaning" +TESTABLE_NOTEBOOK_TRACKS="ml_explainability intro_to_programming geospatial time_series ethics feature_engineering_new data_viz_to_coder ml_intermediate data_cleaning computer_vision deep_learning_intro python pandas machine_learning game_ai" if [[ -n $1 && $1 != "all" ]]; then TRACKS=$1 From 8c3b21c9caffb458006c05fb3ff67e56125da6a0 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Wed, 25 Jan 2023 21:54:37 -0500 Subject: [PATCH 02/17] Add files via upload editing tutorial file to remove pdpbox --- .../raw/tut3_partial_plots.ipynb | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb b/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb index d11747442..67487f930 100644 --- a/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb @@ -88,8 +88,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - "Here is the code to create the Partial Dependence Plot using the [PDPBox library](https://pdpbox.readthedocs.io/en/latest/)." + "Here is the code to create the Partial Dependence Plot using the scikit-learn library." ] }, { @@ -99,13 +98,10 @@ "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", - "from pdpbox import pdp, get_dataset, info_plots\n", - "\n", - "# Create the data that we will plot\n", - "pdp_goals = pdp.pdp_isolate(model=tree_model, dataset=val_X, model_features=feature_names, feature='Goal Scored')\n", + "from sklearn.inspection import PartialDependenceDisplay\n", "\n", - "# plot it\n", - "pdp.pdp_plot(pdp_goals, 'Goal Scored')\n", + "# Create and plot the data\n", + "PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Goal Scored'])\n", "plt.show()" ] }, @@ -113,9 +109,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A few items are worth pointing out as you interpret this plot\n", - "- The y axis is interpreted as **change in the prediction** from what it would be predicted at the baseline or leftmost value.\n", - "- A blue shaded area indicates level of confidence\n", + "The y axis is interpreted as **change in the prediction** from what it would be predicted at the baseline or leftmost value.\n", "\n", "From this particular graph, we see that scoring a goal substantially increases your chances of winning \"Man of The Match.\" But extra goals beyond that appear to have little impact on predictions.\n", "\n", @@ -128,10 +122,7 @@ "metadata": {}, "outputs": [], "source": [ - "feature_to_plot = 'Distance Covered (Kms)'\n", - "pdp_dist = pdp.pdp_isolate(model=tree_model, dataset=val_X, model_features=feature_names, feature=feature_to_plot)\n", - "\n", - "pdp.pdp_plot(pdp_dist, feature_to_plot)\n", + "PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Distance Covered (Kms)'])\n", "plt.show()" ] }, @@ -153,9 +144,7 @@ "# Build Random Forest model\n", "rf_model = RandomForestClassifier(random_state=0).fit(train_X, train_y)\n", "\n", - "pdp_dist = pdp.pdp_isolate(model=rf_model, dataset=val_X, model_features=feature_names, feature=feature_to_plot)\n", - "\n", - "pdp.pdp_plot(pdp_dist, feature_to_plot)\n", + "PartialDependenceDisplay.from_estimator(rf_model, val_X, ['Distance Covered (Kms)'])\n", "plt.show()" ] }, @@ -179,11 +168,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Similar to previous PDP plot except we use pdp_interact instead of pdp_isolate and pdp_interact_plot instead of pdp_isolate_plot\n", - "features_to_plot = ['Goal Scored', 'Distance Covered (Kms)']\n", - "inter1 = pdp.pdp_interact(model=tree_model, dataset=val_X, model_features=feature_names, features=features_to_plot)\n", - "\n", - "pdp.pdp_interact_plot(pdp_interact_out=inter1, feature_names=features_to_plot, plot_type='contour')\n", + "# Similar to previous PDP plot except we use tuple of features instead of single feature\n", + "PartialDependenceDisplay.from_estimator(tree_model, val_X, [('Goal Scored', 'Distance Covered (Kms)')])\n", "plt.show()" ] }, @@ -209,7 +195,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, From 58adfa7e9fe166c2395ea37293f4001dc44e32ed Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Thu, 26 Jan 2023 16:58:12 -0500 Subject: [PATCH 03/17] update checking code --- learntools/ml_explainability/ex3.py | 83 +++++++++++++++++++---------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/learntools/ml_explainability/ex3.py b/learntools/ml_explainability/ex3.py index ac76f744a..f1e139332 100644 --- a/learntools/ml_explainability/ex3.py +++ b/learntools/ml_explainability/ex3.py @@ -5,15 +5,14 @@ from learntools.core import * +# 1 class WhyThatUShape(ThoughtExperiment): _solution = \ """ The code is for feat_name in base_features: - pdp_dist = pdp.pdp_isolate(model=first_model, dataset=val_X, - model_features=base_features, feature=feat_name) - pdp.pdp_plot(pdp_dist, feat_name) + PartialDependenceDisplay.from_estimator(first_model, val_X, [feat_name]) plt.show() @@ -25,6 +24,7 @@ class WhyThatUShape(ThoughtExperiment): For the same reason, we see the general U-shape in all our partial dependence plots. """ +# 2 class PonderPDPContour(ThoughtExperiment): _solution = \ """ @@ -40,46 +40,66 @@ class PonderPDPContour(ThoughtExperiment): The code you need to create the desired plot is: - fnames = ['pickup_longitude', 'dropoff_longitude'] - longitudes_partial_plot = pdp.pdp_interact(model=first_model, dataset=val_X, - model_features=base_features, features=fnames) - pdp.pdp_interact_plot(pdp_interact_out=longitudes_partial_plot, - feature_names=fnames, plot_type='contour') + fig, ax = plt.subplots(figsize=(8, 6)) + fnames = [('pickup_longitude', 'dropoff_longitude')] + disp = PartialDependenceDisplay.from_estimator(first_model, val_X, fnames, ax=ax) plt.show() """ +# 3 class ReadPDPContour(CodingProblem): _var = 'savings_from_shorter_trip' - _hint = 'First find the vertical level corresponding to -74 dropoff longitude. Then read off the horizontal values you are switching between. Use the white contour lines to orient yourself on what values you are near. You can round to the nearest integer rather than stressing about the exact cost to the nearest penny' - _solution = 'About \$15. The price decreases from slightly more than \$24 to slightly more than \$9.' + _hint = 'First find the vertical level corresponding to -74 dropoff longitude. Then read off the horizontal values you are switching between. Use the contour lines to orient yourself on what values you are near. You can round to the nearest integer rather than stressing about the exact cost to the nearest penny' + _solution = 'About \$6. The price decreases from slightly less than \$15 to slightly less than \$9.' def check(self, savings): if type(savings) == str: savings = Decimal(dollars.strip('$')) - assert ((savings > 13) and (savings < 17)), "Your answer should be about 15. Not {}".format(savings) + assert ((savings > 4) and (savings < 8)), "Your answer should be about 6. Not {}".format(savings) +# 4 class MakePDPWithAbsFeatures(CodingProblem): - _var = 'pdp_dist' - _hint = 'use the abs function when creating the abs_lat_change and abs_lon_change features. You don\'t need to change anything else.' + _var = 'disp' + _hint = 'Use the abs function when creating the abs_lat_change and abs_lon_change features. You don\'t need to change anything else.' _solution = \ """ -The biggest difference is that the partial dependence plot became much smaller. The the lowest vertical value is about $15 below the highest vertical value in the top chart, whereas this difference is only about $3 in the chart you just created. In other words, once you control for absolute distance traveled, the pickup_longitude has only a very small impact on predictions. +The difference is that the partial dependence plot became smaller. Both plots have a lowest vertical value of 8.5. But, the highest vertical value in the top chart is around 10.7, and the highest vertical value in the bottom chart is below 9.1. In other words, once you control for absolute distance traveled, the pickup_longitude has a smaller impact on predictions. # create new features data['abs_lon_change'] = abs(data.dropoff_longitude - data.pickup_longitude) data['abs_lat_change'] = abs(data.dropoff_latitude - data.pickup_latitude) """ - - def check(self, pdp_result): - correct = np.array([9.92212681, 8.97384862, 8.80044327, 8.71024292, 8.71564739, - 8.73523192, 8.76626448, 8.87855912, 9.00098688, 10.99584622]) - submitted = pdp_result.pdp + + def check(self, disp): + correct = np.array([8.730515 , 8.73239078, 8.71804165, 8.72179009, 8.93013488, + 8.68796391, 8.6773792 , 8.6816932 , 8.67547295, 8.64980733, + 8.64402745, 8.65616918, 8.63485345, 8.60505726, 8.59167824, + 8.57101857, 8.55601734, 8.55780041, 8.53660205, 8.53548254, + 8.50739547, 8.50599988, 8.50685068, 8.51981394, 8.52555708, + 8.50483315, 8.53151955, 8.49615781, 8.49384454, 8.49156773, + 8.5123399 , 8.47138576, 8.47491902, 8.50240045, 8.50495725, + 8.50433279, 8.4941558 , 8.50175984, 8.50394946, 8.50890372, + 8.50606589, 8.48335522, 8.48281078, 8.4730394 , 8.47720942, + 8.47699659, 8.52118039, 8.50234077, 8.59717268, 8.51092865, + 8.51177667, 8.51159374, 8.51159432, 8.54379423, 8.50500559, + 8.50631149, 8.52264825, 8.51989952, 8.52841122, 8.52757692, + 8.54425047, 8.56425312, 8.56874055, 8.58372296, 8.5589557 , + 8.57709991, 8.57441775, 8.59449221, 8.60063777, 8.62185164, + 8.6155473 , 8.6118143 , 8.61590988, 8.60758597, 8.62013413, + 8.6334263 , 8.64035478, 8.65324115, 8.66043255, 8.67502176, + 8.68940416, 8.6840402 , 8.67197893, 8.65512484, 8.66810839, + 8.6614093 , 8.65865671, 8.66485738, 8.67966737, 8.82833712, + 9.04135448, 9.03734449, 8.69506545, 8.70261503, 8.70673595, + 8.69045255, 8.69679997, 8.70716659, 8.71006281, 8.71739009]) + submitted = disp.pd_results[0]['average'][0] assert np.allclose(submitted, correct, rtol=0.1) +# 5 class DoesSteepnessImplyImportance(ThoughtExperiment): _solution = "No. This doesn't guarantee `feat_a` is more important. For example, `feat_a` could have a big effect in the cases where it varies, but could have a single value 99\% of the time. In that case, permuting `feat_a` wouldn't matter much, since most values would be unchanged." +# 6 class DesignDatasetUShapedPdp(CodingProblem): - _var = 'pdp_dist' + _var = 'disp' _hint = "Consider explicitly using terms that include mathematical expressions like `(X1 < -1)`" _solution = CS( """ @@ -89,22 +109,26 @@ class DesignDatasetUShapedPdp(CodingProblem): # You don't need any more changes """) - def check(self, pdp_result): - segment_1_end = np.argmin(pdp_result.feature_grids<-1) - segment_3_start = np.argmax(pdp_result.feature_grids>1) + def check(self, disp): + pdp_result = disp.pd_results[0] + x_values = pdp_result['values'][0] + y_values = pdp_result['average'][0] + + segment_1_end = np.argmin(x_values<-1) + segment_3_start = np.argmax(x_values>1) segment_2_start = segment_1_end + 1 segment_2_end = segment_3_start - 1 - segment_1_slopes_down = pdp_result.pdp[0] > pdp_result.pdp[segment_1_end] - segment_2_slopes_up = pdp_result.pdp[segment_2_start] < pdp_result.pdp[segment_2_end] - segment_3_slopes_down = pdp_result.pdp[segment_3_start] > pdp_result.pdp[-1] + segment_1_slopes_down = y_values[0] > y_values[segment_1_end] + segment_2_slopes_up = y_values[segment_2_start] < y_values[segment_2_end] + segment_3_slopes_down = y_values[segment_3_start] > y_values[-1] assert segment_1_slopes_down, ("The partial dependence plot does not slope down for values below -1.") assert segment_2_slopes_up, ("The partial dependence plot does not slope up for values between -1 and 1.") assert segment_3_slopes_down, ("The partial dependence plot does not slope down for values above 1.") class DesignFlatPDPWithHighImportance(CodingProblem): - _vars = ['perm', 'pdp_dist'] + _vars = ['perm', 'disp'] _hint = "You need for X1 to affect the prediction in order to have it affect permutation importance. But the average effect needs to be 0 to satisfy the PDP requirement. Achieve this by creating an interaction, so the effect of X1 depends on the value of X2 and vice-versa." _solution = CS( """ @@ -117,9 +141,10 @@ class DesignFlatPDPWithHighImportance(CodingProblem): # Aside from these lines, use the code provided """) - def check(self, importance, pdpResult): + def check(self, importance, disp): X1_imp = importance.feature_importances_[0] - pdpRange = max(pdpResult.pdp) - min(pdpResult.pdp) + pdpResult = disp.pd_results[0]['average'][0] + pdpRange = max(pdpResult) - min(pdpResult) assert (X1_imp > 0.5), ("Tested that X1 has an importance > 0.5. " "Actual importance was {}").format(X1_imp) assert (pdpRange < 0.5), ("Tested that the highest point on the Partial " From fc8abc279ee1056634c1ca9719a976c931da3a9c Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Thu, 26 Jan 2023 16:59:55 -0500 Subject: [PATCH 04/17] updated ex3 --- .../raw/ex3_partial_plots.ipynb | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb index 2db42d4b1..895350b92 100644 --- a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb @@ -70,7 +70,7 @@ "source": [ "## Question 1\n", "\n", - "Here is the code to plot the partial dependence plot for pickup_longitude. Run the following cell." + "Here is the code to plot the partial dependence plot for `pickup_longitude`. Run the following cell." ] }, { @@ -80,12 +80,10 @@ "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", - "from pdpbox import pdp, get_dataset, info_plots\n", + "from sklearn.inspection import PartialDependenceDisplay\n", "\n", "feat_name = 'pickup_longitude'\n", - "pdp_dist = pdp.pdp_isolate(model=first_model, dataset=val_X, model_features=base_features, feature=feat_name)\n", - "\n", - "pdp.pdp_plot(pdp_dist, feat_name)\n", + "PartialDependenceDisplay.from_estimator(first_model, val_X, [feat_name])\n", "plt.show()" ] }, @@ -107,7 +105,6 @@ "outputs": [], "source": [ "for feat_name in base_features:\n", - " pdp_dist = ____\n", " ____\n", " plt.show()" ] @@ -140,13 +137,13 @@ "Now you will run a 2D partial dependence plot. As a reminder, here is the code from the tutorial. \n", "\n", "```\n", - "inter1 = pdp.pdp_interact(model=my_model, dataset=val_X, model_features=feature_names, features=['Goal Scored', 'Distance Covered (Kms)'])\n", - "\n", - "pdp.pdp_interact_plot(pdp_interact_out=inter1, feature_names=['Goal Scored', 'Distance Covered (Kms)'], plot_type='contour')\n", + "fig, ax = plt.subplots(figsize=(8, 6))\n", + "f_names = [('Goal Scored', 'Distance Covered (Kms)')]\n", + "PartialDependenceDisplay.from_estimator(tree_model, val_X, f_names, ax=ax)\n", "plt.show()\n", "```\n", "\n", - "Create a 2D plot for the features `pickup_longitude` and `dropoff_longitude`. Plot it appropriately?\n", + "Create a 2D plot for the features `pickup_longitude` and `dropoff_longitude`.\n", "\n", "What do you expect it to look like?" ] @@ -157,6 +154,8 @@ "metadata": {}, "outputs": [], "source": [ + "fig, ax = plt.subplots(figsize=(8, 6))\n", + "\n", "# Add your code here\n", "____" ] @@ -183,7 +182,7 @@ "metadata": {}, "source": [ "## Question 3\n", - "Consider a ride starting at longitude -73.92 and ending at longitude -74. Using the graph from the last question, estimate how much money the rider would have saved if they'd started the ride at longitude -73.98 instead?" + "Consider a ride starting at longitude -73.955 and ending at longitude -74. Using the graph from the last question, estimate how much money the rider would have saved if they'd started the ride at longitude -73.98 instead." ] }, { @@ -242,8 +241,7 @@ "pdp.pdp_plot(pdp_dist_original, feat_name)\n", "plt.show()\n", "\n", - "\n", - "\n", + "# Your code here\n", "# create new features\n", "data['abs_lon_change'] = ____\n", "data['abs_lat_change'] = ____\n", @@ -260,9 +258,7 @@ "second_model = RandomForestRegressor(n_estimators=30, random_state=1).fit(new_train_X, new_train_y)\n", "\n", "feat_name = 'pickup_longitude'\n", - "pdp_dist = pdp.pdp_isolate(model=second_model, dataset=new_val_X, model_features=features_2, feature=feat_name)\n", - "\n", - "pdp.pdp_plot(pdp_dist, feat_name)\n", + "disp = PartialDependenceDisplay.from_estimator(second_model, new_val_X, [feat_name])\n", "plt.show()\n", "\n", "# Check your answer\n", @@ -273,7 +269,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Uncomment the lines below to see a hint or the solution (including an explanation of the important differences between the plots)." + "Uncomment the line below to see a hint or the solution (including an explanation of the important differences between the plots)." ] }, { @@ -339,19 +335,17 @@ "# Create array holding predictive feature\n", "X1 = 4 * rand(n_samples) - 2\n", "X2 = 4 * rand(n_samples) - 2\n", + "\n", + "# Your code here\n", "# Create y. you should have X1 and X2 in the expression for y\n", "y = np.ones(n_samples)\n", "\n", - "# create dataframe because pdp_isolate expects a dataFrame as an argument\n", + "# create dataframe \n", "my_df = pd.DataFrame({'X1': X1, 'X2': X2, 'y': y})\n", "predictors_df = my_df.drop(['y'], axis=1)\n", "\n", "my_model = RandomForestRegressor(n_estimators=30, random_state=1).fit(predictors_df, my_df.y)\n", - "\n", - "pdp_dist = pdp.pdp_isolate(model=my_model, dataset=my_df, model_features=['X1', 'X2'], feature='X1')\n", - "\n", - "# visualize your results\n", - "pdp.pdp_plot(pdp_dist, 'X1')\n", + "disp = PartialDependenceDisplay.from_estimator(my_model, predictors_df, ['X1'])\n", "plt.show()\n", "\n", "# Check your answer\n", @@ -410,8 +404,7 @@ "my_model = RandomForestRegressor(n_estimators=30, random_state=1).fit(predictors_df, my_df.y)\n", "\n", "\n", - "pdp_dist = pdp.pdp_isolate(model=my_model, dataset=my_df, model_features=['X1', 'X2'], feature='X1')\n", - "pdp.pdp_plot(pdp_dist, 'X1')\n", + "disp = PartialDependenceDisplay.from_estimator(my_model, predictors_df, ['X1'], grid_resolution=300)\n", "plt.show()\n", "\n", "perm = PermutationImportance(my_model).fit(predictors_df, my_df.y)\n", @@ -448,7 +441,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, From b12b83ab3ffd28c927935261d1e1057a41729d00 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Thu, 26 Jan 2023 17:05:35 -0500 Subject: [PATCH 05/17] Update tut3_partial_plots.ipynb small edits to tut3 --- notebooks/ml_explainability/raw/tut3_partial_plots.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb b/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb index 67487f930..2733d1d3a 100644 --- a/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb @@ -101,7 +101,7 @@ "from sklearn.inspection import PartialDependenceDisplay\n", "\n", "# Create and plot the data\n", - "PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Goal Scored'])\n", + "disp1 = PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Goal Scored'])\n", "plt.show()" ] }, @@ -122,7 +122,7 @@ "metadata": {}, "outputs": [], "source": [ - "PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Distance Covered (Kms)'])\n", + "disp2 = PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Distance Covered (Kms)'])\n", "plt.show()" ] }, @@ -144,7 +144,7 @@ "# Build Random Forest model\n", "rf_model = RandomForestClassifier(random_state=0).fit(train_X, train_y)\n", "\n", - "PartialDependenceDisplay.from_estimator(rf_model, val_X, ['Distance Covered (Kms)'])\n", + "disp3 = PartialDependenceDisplay.from_estimator(rf_model, val_X, ['Distance Covered (Kms)'])\n", "plt.show()" ] }, @@ -169,7 +169,7 @@ "outputs": [], "source": [ "# Similar to previous PDP plot except we use tuple of features instead of single feature\n", - "PartialDependenceDisplay.from_estimator(tree_model, val_X, [('Goal Scored', 'Distance Covered (Kms)')])\n", + "disp4 = PartialDependenceDisplay.from_estimator(tree_model, val_X, [('Goal Scored', 'Distance Covered (Kms)')])\n", "plt.show()" ] }, From 34235c5dcfb52a6c30c05dbb6465c96af877f828 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Thu, 26 Jan 2023 17:14:12 -0500 Subject: [PATCH 06/17] Update tut3_partial_plots.ipynb --- notebooks/ml_explainability/raw/tut3_partial_plots.ipynb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb b/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb index 2733d1d3a..2fafca360 100644 --- a/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/tut3_partial_plots.ipynb @@ -122,7 +122,8 @@ "metadata": {}, "outputs": [], "source": [ - "disp2 = PartialDependenceDisplay.from_estimator(tree_model, val_X, ['Distance Covered (Kms)'])\n", + "feature_to_plot = 'Distance Covered (Kms)'\n", + "disp2 = PartialDependenceDisplay.from_estimator(tree_model, val_X, [feature_to_plot])\n", "plt.show()" ] }, @@ -144,7 +145,7 @@ "# Build Random Forest model\n", "rf_model = RandomForestClassifier(random_state=0).fit(train_X, train_y)\n", "\n", - "disp3 = PartialDependenceDisplay.from_estimator(rf_model, val_X, ['Distance Covered (Kms)'])\n", + "disp3 = PartialDependenceDisplay.from_estimator(rf_model, val_X, [feature_to_plot])\n", "plt.show()" ] }, @@ -168,8 +169,10 @@ "metadata": {}, "outputs": [], "source": [ + "fig, ax = plt.subplots(figsize=(8, 6))\n", + "f_names = [('Goal Scored', 'Distance Covered (Kms)')]\n", "# Similar to previous PDP plot except we use tuple of features instead of single feature\n", - "disp4 = PartialDependenceDisplay.from_estimator(tree_model, val_X, [('Goal Scored', 'Distance Covered (Kms)')])\n", + "disp4 = PartialDependenceDisplay.from_estimator(tree_model, val_X, f_names, ax=ax)\n", "plt.show()" ] }, From 2c842fbad676c252d3e6da51a5ff52ef54724071 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Thu, 26 Jan 2023 19:42:53 -0500 Subject: [PATCH 07/17] Update ex3_partial_plots.ipynb --- notebooks/ml_explainability/raw/ex3_partial_plots.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb index 895350b92..3e6bb773f 100644 --- a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb @@ -236,7 +236,7 @@ "source": [ "# This is the PDP for pickup_longitude without the absolute difference features. Included here to help compare it to the new PDP you create\n", "feat_name = 'pickup_longitude'\n", - "pdp_dist_original = pdp.pdp_isolate(model=first_model, dataset=val_X, model_features=base_features, feature=feat_name)\n", + "PartialDependenceDisplay.from_estimator(first_model, val_X, [feat_name])\n", "\n", "pdp.pdp_plot(pdp_dist_original, feat_name)\n", "plt.show()\n", From 77cbe5869547aaf62a7e6012abda08eaecabda90 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Thu, 26 Jan 2023 19:55:36 -0500 Subject: [PATCH 08/17] Update ex3_partial_plots.ipynb --- notebooks/ml_explainability/raw/ex3_partial_plots.ipynb | 2 -- 1 file changed, 2 deletions(-) diff --git a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb index 3e6bb773f..d06b519df 100644 --- a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb @@ -237,8 +237,6 @@ "# This is the PDP for pickup_longitude without the absolute difference features. Included here to help compare it to the new PDP you create\n", "feat_name = 'pickup_longitude'\n", "PartialDependenceDisplay.from_estimator(first_model, val_X, [feat_name])\n", - "\n", - "pdp.pdp_plot(pdp_dist_original, feat_name)\n", "plt.show()\n", "\n", "# Your code here\n", From 438c8cb40525b624bb282cfcaa4e719fd7bbf185 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:35:36 -0500 Subject: [PATCH 09/17] remove ex3 from testing --- notebooks/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/notebooks/test.sh b/notebooks/test.sh index ddb23543c..e7af375cb 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -101,6 +101,7 @@ do || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] #times out + || [[ ( $nb =~ "ex3_partial_plots" && $track == "ml_explainability" ) ]] #placeholders then echo "Warning: skipping $nb in track $track" continue From 3c8c8fba16973cfad2f909f86a076c3c1e1d55f5 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:52:14 -0500 Subject: [PATCH 10/17] Update ex3_partial_plots.ipynb --- notebooks/ml_explainability/raw/ex3_partial_plots.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb index d06b519df..489b7a305 100644 --- a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb @@ -70,7 +70,7 @@ "source": [ "## Question 1\n", "\n", - "Here is the code to plot the partial dependence plot for `pickup_longitude`. Run the following cell." + "Here is the code to plot the partial dependence plot for `pickup_longitude`. Run the following cell without changes." ] }, { From 7222045ee2f1e1d7ebc4bb85c13d4671825f9cdf Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:53:10 -0500 Subject: [PATCH 11/17] Update ex3.py --- learntools/ml_explainability/ex3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learntools/ml_explainability/ex3.py b/learntools/ml_explainability/ex3.py index f1e139332..0aeab0ea1 100644 --- a/learntools/ml_explainability/ex3.py +++ b/learntools/ml_explainability/ex3.py @@ -50,7 +50,7 @@ class PonderPDPContour(ThoughtExperiment): class ReadPDPContour(CodingProblem): _var = 'savings_from_shorter_trip' _hint = 'First find the vertical level corresponding to -74 dropoff longitude. Then read off the horizontal values you are switching between. Use the contour lines to orient yourself on what values you are near. You can round to the nearest integer rather than stressing about the exact cost to the nearest penny' - _solution = 'About \$6. The price decreases from slightly less than \$15 to slightly less than \$9.' + _solution = 'About 6. The price decreases from slightly less than 15 to slightly less than 9.' def check(self, savings): if type(savings) == str: savings = Decimal(dollars.strip('$')) From f3b9c30fec8bc7a9e39165e972341d92264759aa Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:56:49 -0500 Subject: [PATCH 12/17] Update track_meta.py turn on internet for exercise 3 --- notebooks/ml_explainability/track_meta.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/notebooks/ml_explainability/track_meta.py b/notebooks/ml_explainability/track_meta.py index 1a806b81b..9e31243ef 100644 --- a/notebooks/ml_explainability/track_meta.py +++ b/notebooks/ml_explainability/track_meta.py @@ -41,7 +41,8 @@ filename='ex3_partial_plots.ipynb', lesson_idx=2, type='exercise', - scriptid=1637380 + scriptid=1637380, + enable_internet=True ), dict( filename='tut4_shap_basic.ipynb', From 050e19950c10c344c0ab6dfe957f868ffb2a7c06 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:59:03 -0500 Subject: [PATCH 13/17] Update ex3_partial_plots.ipynb --- notebooks/ml_explainability/raw/ex3_partial_plots.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb index 489b7a305..e664dcdb0 100644 --- a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb @@ -17,7 +17,9 @@ "metadata": {}, "outputs": [], "source": [ + "# Get most recent checking code\n", "import pandas as pd\n", + "!pip install -U -t /kaggle/working/ git+https://github.com/Kaggle/learntools.git\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split\n", From 8882f97a8ac354aa6d5d6ffbf3c79ff6dcc4b87e Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:59:25 -0500 Subject: [PATCH 14/17] Update ex3_partial_plots.ipynb --- notebooks/ml_explainability/raw/ex3_partial_plots.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb index e664dcdb0..a599cfe29 100644 --- a/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb +++ b/notebooks/ml_explainability/raw/ex3_partial_plots.ipynb @@ -18,8 +18,8 @@ "outputs": [], "source": [ "# Get most recent checking code\n", - "import pandas as pd\n", "!pip install -U -t /kaggle/working/ git+https://github.com/Kaggle/learntools.git\n", + "import pandas as pd\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split\n", From 2ddbb946753d69696885e130933bd6766cd37f00 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 09:59:53 -0500 Subject: [PATCH 15/17] Update test.sh --- notebooks/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/test.sh b/notebooks/test.sh index e7af375cb..02066f39b 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -100,7 +100,7 @@ do || [[ ( $nb =~ "tut1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "tut5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "tut6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] \ - || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] #times out + || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] \ #times out || [[ ( $nb =~ "ex3_partial_plots" && $track == "ml_explainability" ) ]] #placeholders then echo "Warning: skipping $nb in track $track" From 1d55f3a9009d6c45d8f766eb345ead31d87d2d85 Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 10:07:05 -0500 Subject: [PATCH 16/17] Update test.sh --- notebooks/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/test.sh b/notebooks/test.sh index 02066f39b..83ee97177 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -100,8 +100,8 @@ do || [[ ( $nb =~ "tut1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "tut5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "tut6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] \ - || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] \ #times out - || [[ ( $nb =~ "ex3_partial_plots" && $track == "ml_explainability" ) ]] #placeholders + || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] \ + || [[ ( $nb =~ "ex3_partial_plots" && $track == "ml_explainability" ) ]] then echo "Warning: skipping $nb in track $track" continue From 80edc8c844e767b294d5c5d3e377db6192b7d56a Mon Sep 17 00:00:00 2001 From: Alexis Cook Date: Fri, 27 Jan 2023 10:32:35 -0500 Subject: [PATCH 17/17] Update test.sh --- notebooks/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/test.sh b/notebooks/test.sh index 83ee97177..3a020601b 100755 --- a/notebooks/test.sh +++ b/notebooks/test.sh @@ -100,8 +100,8 @@ do || [[ ( $nb =~ "tut1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "tut5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "tut6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex5" && $track == "computer_vision" ) ]] || [[ ( $nb =~ "ex6" && $track == "computer_vision" ) ]] \ || [[ ( $nb =~ "ex1" && $track == "computer_vision" ) ]] \ - || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] \ - || [[ ( $nb =~ "ex3_partial_plots" && $track == "ml_explainability" ) ]] + || [[ ( $nb =~ "ex2" && $track == "nlp" ) ]] \ + || [[ ( $nb =~ "ex3_partial_plots" && $track == "ml_explainability" ) ]] then echo "Warning: skipping $nb in track $track" continue