saayeeem
diff --git a/‎.gitattributes‎
Lines changed: 0 additions & 1 deletion b/‎.gitattributes‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎Capstone_Data_Science_SpaceY/spacex-falcon9-data-collection-api.ipynb‎
Lines changed: 273 additions & 3 deletions b/‎Capstone_Data_Science_SpaceY/spacex-falcon9-data-collection-api.ipynb‎
Lines changed: 273 additions & 3 deletions
@@ -1,4 +1,3 @@
-*.ipynb filter=lfs diff=lfs merge=lfs -text
 *.csv filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.keras filter=lfs diff=lfs merge=lfs -text
 
@@ -1,3 +1,273 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8b51872ebfef34e927bbb37fbd616b5c74467c36448b2aff16dc7db0e7575386
-size 7673
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SpaceX Falcon 9 First Stage Landing Data Collection\n",
+    "\n",
+    "This notebook is part of my personal data science project. All content and analysis are original and tailored for my own exploration of SpaceX launch data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Project Overview\n",
+    "\n",
+    "The goal is to collect, clean, and prepare SpaceX Falcon 9 launch data for further analysis and machine learning. This notebook focuses on retrieving data from the SpaceX API and performing initial wrangling."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "- Request and collect SpaceX Falcon 9 launch data from the API\n",
+    "- Clean and format the data for analysis\n",
+    "- Prepare the dataset for downstream machine learning tasks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Libraries and Define Helper Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Helper functions to extract details from API responses\n",
+    "def getBoosterVersion(data):\n",
+    "    for x in data['rocket']:\n",
+    "        if x:\n",
+    "            response = requests.get(f\"https://api.spacexdata.com/v4/rockets/{x}\").json()\n",
+    "            BoosterVersion.append(response['name'])\n",
+    "\n",
+    "def getLaunchSite(data):\n",
+    "    for x in data['launchpad']:\n",
+    "        if x:\n",
+    "            response = requests.get(f\"https://api.spacexdata.com/v4/launchpads/{x}\").json()\n",
+    "            Longitude.append(response['longitude'])\n",
+    "            Latitude.append(response['latitude'])\n",
+    "            LaunchSite.append(response['name'])\n",
+    "\n",
+    "def getPayloadData(data):\n",
+    "    for load in data['payloads']:\n",
+    "        if load:\n",
+    "            response = requests.get(f\"https://api.spacexdata.com/v4/payloads/{load}\").json()\n",
+    "            PayloadMass.append(response['mass_kg'])\n",
+    "            Orbit.append(response['orbit'])\n",
+    "\n",
+    "def getCoreData(data):\n",
+    "    for core in data['cores']:\n",
+    "        if core['core'] is not None:\n",
+    "            response = requests.get(f\"https://api.spacexdata.com/v4/cores/{core['core']}\").json()\n",
+    "            Block.append(response['block'])\n",
+    "            ReusedCount.append(response['reuse_count'])\n",
+    "            Serial.append(response['serial'])\n",
+    "        else:\n",
+    "            Block.append(None)\n",
+    "            ReusedCount.append(None)\n",
+    "            Serial.append(None)\n",
+    "        Outcome.append(str(core['landing_success']) + ' ' + str(core['landing_type']))\n",
+    "        Flights.append(core['flight'])\n",
+    "        GridFins.append(core['gridfins'])\n",
+    "        Reused.append(core['reused'])\n",
+    "        Legs.append(core['legs'])\n",
+    "        LandingPad.append(core['landpad'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Collection\n",
+    "\n",
+    "Request SpaceX Falcon 9 launch data from the API and perform initial wrangling."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spacex_url = \"https://api.spacexdata.com/v4/launches/past\"\n",
+    "response = requests.get(spacex_url)\n",
+    "data = pd.json_normalize(response.json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only relevant columns and filter for single-core, single-payload launches\n",
+    "data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]\n",
+    "data = data[data['cores'].map(len) == 1]\n",
+    "data = data[data['payloads'].map(len) == 1]\n",
+    "data['cores'] = data['cores'].map(lambda x: x[0])\n",
+    "data['payloads'] = data['payloads'].map(lambda x: x[0])\n",
+    "data['date'] = pd.to_datetime(data['date_utc']).dt.date\n",
+    "data = data[data['date'] <= datetime.date(2020, 11, 13)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare lists for extracted features\n",
+    "BoosterVersion = []\n",
+    "PayloadMass = []\n",
+    "Orbit = []\n",
+    "LaunchSite = []\n",
+    "Outcome = []\n",
+    "Flights = []\n",
+    "GridFins = []\n",
+    "Reused = []\n",
+    "Legs = []\n",
+    "LandingPad = []\n",
+    "Block = []\n",
+    "ReusedCount = []\n",
+    "Serial = []\n",
+    "Longitude = []\n",
+    "Latitude = []\n",
+    "\n",
+    "# Extract features using helper functions\n",
+    "getBoosterVersion(data)\n",
+    "getLaunchSite(data)\n",
+    "getPayloadData(data)\n",
+    "getCoreData(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Construct the final dataset\n",
+    "dataset = {\n",
+    "    'FlightNumber': list(data['flight_number']),\n",
+    "    'Date': list(data['date']),\n",
+    "    'BoosterVersion': BoosterVersion,\n",
+    "    'PayloadMass': PayloadMass,\n",
+    "    'Orbit': Orbit,\n",
+    "    'LaunchSite': LaunchSite,\n",
+    "    'Outcome': Outcome,\n",
+    "    'Flights': Flights,\n",
+    "    'GridFins': GridFins,\n",
+    "    'Reused': Reused,\n",
+    "    'Legs': Legs,\n",
+    "    'LandingPad': LandingPad,\n",
+    "    'Block': Block,\n",
+    "    'ReusedCount': ReusedCount,\n",
+    "    'Serial': Serial,\n",
+    "    'Longitude': Longitude,\n",
+    "    'Latitude': Latitude\n",
+    "}\n",
+    "df = pd.DataFrame(dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Cleaning\n",
+    "\n",
+    "Filter for Falcon 9 launches and handle missing values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only Falcon 9 launches\n",
+    "df = df[df['BoosterVersion'] != 'Falcon 1']\n",
+    "df.loc[:, 'FlightNumber'] = list(range(1, df.shape[0] + 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Handle missing values in PayloadMass\n",
+    "payload_mass_mean = df['PayloadMass'].mean()\n",
+    "df['PayloadMass'].replace(np.nan, payload_mass_mean, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Save Cleaned Data\n",
+    "\n",
+    "Export the cleaned dataset for further analysis."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv('dataset-part-1.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python",
+   "language": "python",
+   "name": "conda-env-python-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-*.ipynb filter=lfs diff=lfs merge=lfs -text`
`2`	`1`	`*.csv filter=lfs diff=lfs merge=lfs -text`
`3`	`2`	`*.mp4 filter=lfs diff=lfs merge=lfs -text`
`4`	`3`	`*.keras filter=lfs diff=lfs merge=lfs -text`