|
13 | 13 | "id": "050423fd-758c-462e-94cf-3c36e97e026b", |
14 | 14 | "metadata": {}, |
15 | 15 | "source": [ |
16 | | - "1. Install Java 1.8 from Software Center\n", |
| 16 | + "1. Install Java 1.8 from Sun Java [site](https://www.java.com/download/ie_manual.jsp). Include path to java.exe in your PATH environment variable.\n", |
17 | 17 | "2. Install Python\n", |
18 | 18 | "3. Create pyspark_dev virtual environment\n", |
19 | 19 | "4. Activate \"pyspark_dev\" environment, then: pip install pyspark[sql] ipykernel\n", |
20 | 20 | "5. Install kernel: python -m ipykernel install --user --name pyspark_dev --display-name \"Python (pyspark_dev)\"\n", |
21 | 21 | "6. Set environment variables: PYSPARK_PYTHON=[path_to_python.exe] and SPARK_HOME=[path_to_site_packages/pyspark_folder]\n", |
22 | 22 | "7. Download winutils.exe from https://github.com/cdarlint/winutils, save locally to \"hadoop/bin\" folder and then\n", |
23 | 23 | "8. set HADOOP_HOME=[path_to_hadoop_folder]\n", |
24 | | - "9. Activate jupyterlab environment and then launch: jupyter lab\n", |
| 24 | + "9. Activate python virutal environment that has jupyterlab installed and then launch: jupyter lab\n", |
25 | 25 | "\n", |
26 | 26 | "See this article https://phoenixnap.com/kb/install-spark-on-windows-10" |
27 | 27 | ] |
|
31 | 31 | "execution_count": 1, |
32 | 32 | "id": "664504f6-bbac-466e-982d-42c62fb82a37", |
33 | 33 | "metadata": {}, |
34 | | - "outputs": [ |
35 | | - { |
36 | | - "name": "stderr", |
37 | | - "output_type": "stream", |
38 | | - "text": [ |
39 | | - "22/01/02 20:21:59 WARN Utils: Your hostname, asus-laptop resolves to a loopback address: 127.0.1.1; using 172.30.59.173 instead (on interface eth0)\n", |
40 | | - "22/01/02 20:21:59 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", |
41 | | - "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n", |
42 | | - "Setting default log level to \"WARN\".\n", |
43 | | - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", |
44 | | - "22/01/02 20:22:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" |
45 | | - ] |
46 | | - } |
47 | | - ], |
| 34 | + "outputs": [], |
48 | 35 | "source": [ |
49 | 36 | "from pyspark.sql import SparkSession\n", |
50 | | - "spark = SparkSession.builder.master(\"local[*]\").getOrCreate()\n", |
| 37 | + "spark = SparkSession.builder.master(\"local[*]\").appName(\"local_pyspark\").getOrCreate()\n", |
51 | 38 | "spark.conf.set(\"spark.sql.repl.eagerEval.enabled\", True)" |
52 | 39 | ] |
53 | 40 | }, |
|
92 | 79 | ], |
93 | 80 | "metadata": { |
94 | 81 | "kernelspec": { |
95 | | - "display_name": "Py3.8 (pyspark_dev)", |
| 82 | + "display_name": "Py3.9 (pyspark_dev)", |
96 | 83 | "language": "python", |
97 | 84 | "name": "pyspark_dev" |
98 | 85 | }, |
|
106 | 93 | "name": "python", |
107 | 94 | "nbconvert_exporter": "python", |
108 | 95 | "pygments_lexer": "ipython3", |
109 | | - "version": "3.8.10" |
| 96 | + "version": "3.9.7" |
110 | 97 | } |
111 | 98 | }, |
112 | 99 | "nbformat": 4, |
|
0 commit comments