Skip to content

Commit 526add2

Browse files
committed
Corrected instructions for installing local PySpark on Windows 10
1 parent b706696 commit 526add2

File tree

1 file changed

+6
-19
lines changed

1 file changed

+6
-19
lines changed

sql/PySpark/PySpark_Local_Example.ipynb

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
"id": "050423fd-758c-462e-94cf-3c36e97e026b",
1414
"metadata": {},
1515
"source": [
16-
"1. Install Java 1.8 from Software Center\n",
16+
"1. Install Java 1.8 from Sun Java [site](https://www.java.com/download/ie_manual.jsp). Include path to java.exe in your PATH environment variable.\n",
1717
"2. Install Python\n",
1818
"3. Create pyspark_dev virtual environment\n",
1919
"4. Activate \"pyspark_dev\" environment, then: pip install pyspark[sql] ipykernel\n",
2020
"5. Install kernel: python -m ipykernel install --user --name pyspark_dev --display-name \"Python (pyspark_dev)\"\n",
2121
"6. Set environment variables: PYSPARK_PYTHON=[path_to_python.exe] and SPARK_HOME=[path_to_site_packages/pyspark_folder]\n",
2222
"7. Download winutils.exe from https://github.com/cdarlint/winutils, save locally to \"hadoop/bin\" folder and then\n",
2323
"8. set HADOOP_HOME=[path_to_hadoop_folder]\n",
24-
"9. Activate jupyterlab environment and then launch: jupyter lab\n",
24+
"9. Activate python virutal environment that has jupyterlab installed and then launch: jupyter lab\n",
2525
"\n",
2626
"See this article https://phoenixnap.com/kb/install-spark-on-windows-10"
2727
]
@@ -31,23 +31,10 @@
3131
"execution_count": 1,
3232
"id": "664504f6-bbac-466e-982d-42c62fb82a37",
3333
"metadata": {},
34-
"outputs": [
35-
{
36-
"name": "stderr",
37-
"output_type": "stream",
38-
"text": [
39-
"22/01/02 20:21:59 WARN Utils: Your hostname, asus-laptop resolves to a loopback address: 127.0.1.1; using 172.30.59.173 instead (on interface eth0)\n",
40-
"22/01/02 20:21:59 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
41-
"Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n",
42-
"Setting default log level to \"WARN\".\n",
43-
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
44-
"22/01/02 20:22:00 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
45-
]
46-
}
47-
],
34+
"outputs": [],
4835
"source": [
4936
"from pyspark.sql import SparkSession\n",
50-
"spark = SparkSession.builder.master(\"local[*]\").getOrCreate()\n",
37+
"spark = SparkSession.builder.master(\"local[*]\").appName(\"local_pyspark\").getOrCreate()\n",
5138
"spark.conf.set(\"spark.sql.repl.eagerEval.enabled\", True)"
5239
]
5340
},
@@ -92,7 +79,7 @@
9279
],
9380
"metadata": {
9481
"kernelspec": {
95-
"display_name": "Py3.8 (pyspark_dev)",
82+
"display_name": "Py3.9 (pyspark_dev)",
9683
"language": "python",
9784
"name": "pyspark_dev"
9885
},
@@ -106,7 +93,7 @@
10693
"name": "python",
10794
"nbconvert_exporter": "python",
10895
"pygments_lexer": "ipython3",
109-
"version": "3.8.10"
96+
"version": "3.9.7"
11097
}
11198
},
11299
"nbformat": 4,

0 commit comments

Comments
 (0)