|
69 | 69 |
|
70 | 70 | EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python") |
71 | 71 | SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin") |
| 72 | +DATA_PATH = os.path.join(SPARK_HOME, "data") |
| 73 | +LICENSES_PATH = os.path.join(SPARK_HOME, "licenses") |
| 74 | + |
72 | 75 | SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin") |
73 | 76 | JARS_TARGET = os.path.join(TEMP_PATH, "jars") |
74 | 77 | EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples") |
75 | | - |
| 78 | +DATA_TARGET = os.path.join(TEMP_PATH, "data") |
| 79 | +LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses") |
76 | 80 |
|
77 | 81 | # Check and see if we are under the spark path in which case we need to build the symlink farm. |
78 | 82 | # This is important because we only want to build the symlink farm while under Spark otherwise we |
@@ -114,11 +118,15 @@ def _supports_symlinks(): |
114 | 118 | os.symlink(JARS_PATH, JARS_TARGET) |
115 | 119 | os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET) |
116 | 120 | os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET) |
| 121 | + os.symlink(DATA_PATH, DATA_TARGET) |
| 122 | + os.symlink(LICENSES_PATH, LICENSES_TARGET) |
117 | 123 | else: |
118 | 124 | # For windows fall back to the slower copytree |
119 | 125 | copytree(JARS_PATH, JARS_TARGET) |
120 | 126 | copytree(SCRIPTS_PATH, SCRIPTS_TARGET) |
121 | 127 | copytree(EXAMPLES_PATH, EXAMPLES_TARGET) |
| 128 | + copytree(DATA_PATH, DATA_TARGET) |
| 129 | + copytree(LICENSES_PATH, LICENSES_TARGET) |
122 | 130 | else: |
123 | 131 | # If we are not inside of SPARK_HOME verify we have the required symlink farm |
124 | 132 | if not os.path.exists(JARS_TARGET): |
@@ -161,18 +169,24 @@ def _supports_symlinks(): |
161 | 169 | 'pyspark.jars', |
162 | 170 | 'pyspark.python.pyspark', |
163 | 171 | 'pyspark.python.lib', |
| 172 | + 'pyspark.data', |
| 173 | + 'pyspark.licenses', |
164 | 174 | 'pyspark.examples.src.main.python'], |
165 | 175 | include_package_data=True, |
166 | 176 | package_dir={ |
167 | 177 | 'pyspark.jars': 'deps/jars', |
168 | 178 | 'pyspark.bin': 'deps/bin', |
169 | 179 | 'pyspark.python.lib': 'lib', |
| 180 | + 'pyspark.data': 'deps/data', |
| 181 | + 'pyspark.licenses': 'deps/licenses', |
170 | 182 | 'pyspark.examples.src.main.python': 'deps/examples', |
171 | 183 | }, |
172 | 184 | package_data={ |
173 | 185 | 'pyspark.jars': ['*.jar'], |
174 | 186 | 'pyspark.bin': ['*'], |
175 | 187 | 'pyspark.python.lib': ['*.zip'], |
| 188 | + 'pyspark.data': ['*.txt', '*.data'], |
| 189 | + 'pyspark.licenses': ['*.txt'], |
176 | 190 | 'pyspark.examples.src.main.python': ['*.py', '*/*.py']}, |
177 | 191 | scripts=scripts, |
178 | 192 | license='http://www.apache.org/licenses/LICENSE-2.0', |
@@ -202,8 +216,12 @@ def _supports_symlinks(): |
202 | 216 | os.remove(os.path.join(TEMP_PATH, "jars")) |
203 | 217 | os.remove(os.path.join(TEMP_PATH, "bin")) |
204 | 218 | os.remove(os.path.join(TEMP_PATH, "examples")) |
| 219 | + os.remove(os.path.join(TEMP_PATH, "data")) |
| 220 | + os.remove(os.path.join(TEMP_PATH, "licenses")) |
205 | 221 | else: |
206 | 222 | rmtree(os.path.join(TEMP_PATH, "jars")) |
207 | 223 | rmtree(os.path.join(TEMP_PATH, "bin")) |
208 | 224 | rmtree(os.path.join(TEMP_PATH, "examples")) |
| 225 | + rmtree(os.path.join(TEMP_PATH, "data")) |
| 226 | + rmtree(os.path.join(TEMP_PATH, "licenses")) |
209 | 227 | os.rmdir(TEMP_PATH) |
0 commit comments