thecuriousteam · Ahasasjain · Oct 14, 2023 · Oct 16, 2023 · Oct 18, 2023 · Oct 20, 2023
diff --git a/src/App.jsx b/src/App.jsx
@@ -5,9 +5,20 @@ import AllApps from "./Python_Library_Pages/AllApps";
 import PythonBasics from "./Python_Library_Pages/Python_Basics/Introduction-to-Python";
 import NumpyBasics from "./Python_Library_Pages/Numpy/Intro-to-Numpy";
 import MatplotlibBasics from "./Python_Library_Pages/Matplotlib/Intro-to-Matplotlib";
+import BeautifulSoupBasics from "./Python_Library_Pages/BeautifulSoup/Intro-to-BeautifulSoup";
+import GettingStartedBS from "./Python_Library_Pages/BeautifulSoup/Getting-Started-With-BS";
+import BasicsBeautifulSoup from "./Python_Library_Pages/BeautifulSoup/Basics-of-BeautifulSoup";
+import NavigatingHTMLTree from "./Python_Library_Pages/BeautifulSoup/Navigating-HTML-Tree";
+import DataFromWebPages from "./Python_Library_Pages/BeautifulSoup/Extracting-Data-From-WebPage";
+import HandleComplexHTML from "./Python_Library_Pages/BeautifulSoup/Handling-Complex-HTML";
+import RealWorldExamples from "./Python_Library_Pages/BeautifulSoup/Real-World-Examples";
+import AdvanceWebScrapping from "./Python_Library_Pages/BeautifulSoup/Advance-Web-Scrapping";
+import OperatorsBasics from "./Python_Library_Pages/Python_Basics/Intrduction-to-Operators";
+import FunctionsBasics from "./Python_Library_Pages/Python_Basics/Introduction-to-Functions";
 import PandasBasics from "./Python_Library_Pages/Pandas/Intro-to-Pandas";
 import OperatorsBasics from "./Python_Library_Pages/Python_Basics/Intrduction-to-Operators";
 import FunctionsBasics from "./Python_Library_Pages/Python_Basics/Introduction-to-Functions";
+
 import PlayGround from "./Python/PlayGround";
 
 import Navbar from "./Components/Navbar";
@@ -49,14 +60,55 @@ const App = () => {
           <Route path="Intro-to-Matplotlib" element={<MatplotlibBasics />} />
         </Route>
 
+         <Route path="BeautifulSoup-Library" element={<Outlet />}>
+              <Route 
+                path="Intro-to-BeautifulSoup" 
+                element={<BeautifulSoupBasics />} 
+              />
+              <Route 
+                path="Getting-Started-With-BS" 
+                element={<GettingStartedBS />} 
+              />
+              <Route 
+                path="Basics-of-BeautifulSoup" 
+                element={<BasicsBeautifulSoup />} 
+              />
+              <Route 
+                path="Navigating-HTML-Tree" 
+                element={<NavigatingHTMLTree />} 
+              />
+              <Route 
+                path="Extracting-Data-From-WebPage" 
+                element={<DataFromWebPages />}
+              />
+              <Route 
+              path="Handling-Complex-HTML" 
+              element={<HandleComplexHTML />} 
+             />
+             <Route 
+              path="Advance-Web-Scrapping" 
+              element={<AdvanceWebScrapping />} 
+             />
+             <Route 
+              path="Real-World-Examples" 
+              element={<RealWorldExamples />} 
+             />
+
         {/* Seaborn  */}
         <Route path="/Seaborn" element={<Outlet />}>
           <Route path="Introduction-to-seaborn" element={<Seaborn />} />
         </Route>
 
+
         {/* TensorFlow  */}
         <Route path="/TensorFlow" element={<Outlet />}>
           <Route path="Introduction-to-tensorFlow" element={<TensorFlow />} />
+          <Route path="Installation" element={<Installation />}/>
+          <Route
+            path="Introduction-to-Tensors"
+            element={<Tensors />}
+          />
+          <Route path="Introduction-to-Variables" element={<Variables />} />
         </Route>
 
         <Route path="/Flask" element={<Outlet />}>

diff --git a/src/Constants/index.js b/src/Constants/index.js
@@ -285,6 +285,46 @@ export const subMenusList = [
     ],
   },
   {
+    name: "BeautifulSoup-Library",
+    title: "BeautifulSoup Library",
+    route: "/BeautifulSoup-Library/intro-to-BeautifulSoup",
+    children: [
+      {
+        title: "Intro to BeautifulSoup",
+        route: "Intro-to-BeautifulSoup",
+      },
+      {
+        title: "Getting Started With BeautifulSoup",
+        route: "Getting-Started-With-BS",
+      },
+      {
+        title: "Basics of BeautifulSoup",
+        route: "Basics-of-BeautifulSoup",
+      },
+      {
+        title: "Navigating The HTML Tree",
+        route: "Navigating-HTML-Tree",
+      },
+      {
+        title: "Extracting Data From Web-Pages",
+        route: "Extracting-Data-From-WebPage",
+      },
+      {
+        title: "Handling Complex HTML Structures",
+        route: "Handling-Complex-HTML",
+      },
+      {
+        title: "Real-World Examples and Case Studies",
+        route: "ReaL-World-Examples",
+      },
+      {
+        title: "Advanced Web Scraping Techniques",
+        route: "Advance-Web-Scrapping",
+      }
+    ],
+  },
+
+
     name: "Pillow (PIL)",
     title: "Pillow (PIL)",
     route: "/Pillow-(PIL)/Introduction-to-Pillow-(PIL)",
@@ -561,6 +601,9 @@ export const subMenusList = [
         route: "Introduction-to-TensorFlow",
       },
     ],
+  }
+
+
   },
   {
     name: "Tkinter",

diff --git a/src/Python_Library_Pages/BeautifulSoup/Advance-Web-Scrapping.jsx b/src/Python_Library_Pages/BeautifulSoup/Advance-Web-Scrapping.jsx
@@ -0,0 +1,187 @@
+import React from "react";
+
+const AdvanceWebScrapping = () => {
+  return (
+    <div>
+      <h1><strong>Advanced Web Scraping Techniques </strong> </h1> <br />
+
+      <p>Advanced web scraping often involves dealing with more complex scenarios, such as handling pagination, interacting with JavaScript-based websites, and avoiding web scraping restrictions. In this section, we will explore advanced web scraping techniques using Beautiful Soup and related tools.</p>
+    <br />
+      <h1><strong>Handling Pagination </strong> </h1> <br />
+      <p>Pagination is common on websites that display data across multiple pages, such as search results or product listings. To scrape data from multiple pages, you need to navigate through each page and extract the desired information. Here's a high-level approach:</p>
+    <br />
+
+    <p>
+    <strong> Retrieve the First Page:</strong> Fetch the HTML content of the first page and parse it with Beautiful Soup. <br /> <br />
+
+    <strong>Extract Data:</strong> Extract the data you need from the first page. <br /> <br />
+
+    <strong>Identify Pagination Mechanism:</strong> Find elements or controls that allow you to navigate to the next page (e.g., "Next" buttons or page numbers). <br /> <br />
+
+    <strong> Iterate Through Pages:</strong> Use a loop to iterate through the pages by following the pagination mechanism, fetching each page's content, and extracting data.</p> <br /> <br />
+    <p>Here's an example of scraping search results from a paginated website:</p> <br />
+
+    <p className="snippet">
+    <pre>
+        <code>
+        {`import requests
+from bs4 import BeautifulSoup
+
+base_url = "https://example.com/search?q="
+page_number = 1
+while True:
+    url = f"{base_url}{page_number}"
+    response = requests.get(url)
+    if response.status_code != 200:
+        break  # Stop if the page is not found or an error occurs
+
+    soup = BeautifulSoup(response.text, 'html.parser')
+    # Extract data from the current page
+
+    # Find the "Next" button or page number for the next page
+    next_button = soup.find('a', class_='next')
+    if not next_button:
+        break  # No more pages to scrape
+
+    page_number += 1
+`
+}
+        </code>
+      </pre>
+      </p> <br />
+
+      <div class="content">
+    <h1><strong>Handling JavaScript-Driven Websites</strong></h1> <br />
+      <p>Some websites load content dynamically using JavaScript, making traditional web scraping challenging. In such cases, consider using tools like Selenium in combination with Beautiful Soup. Selenium allows you to automate web interactions and retrieve data from pages that rely on JavaScript to render content.</p> <br />
+      <br />
+
+      <p>Here's a basic example of using Selenium with Beautiful Soup:</p> <br />
+      <p className="snippet">
+      <pre>
+        <code>
+        {`from selenium import webdriver
+from bs4 import BeautifulSoup
+
+# Set up a Selenium webdriver (you need to install the appropriate driver)
+driver = webdriver.Chrome(executable_path='path/to/chromedriver')
+
+# Load a webpage with JavaScript content
+driver.get('https://example.com/some-page')
+
+# Wait for the page to load completely (you may need to adjust the wait time)
+import time
+time.sleep(5)
+
+# Get the page source after JavaScript rendering
+page_source = driver.page_source
+
+# Parse the page source with Beautiful Soup
+soup = BeautifulSoup(page_source, 'html.parser')
+
+# Extract data from the page
+# ...
+
+# Don't forget to close the driver when done
+driver.quit()
+`
+}
+        </code>
+      </pre>
+
+</p> <br />
+</div>
+
+
+<div class="content">
+<h1><strong>Avoiding Web Scraping Restrictions</strong></h1> <br />
+  <p>Some websites actively discourage or block web scraping. To overcome restrictions and avoid being detected as a scraper, consider the following techniques:</p> <br />
+
+  <p> 
+  <strong> Use User Agents:</strong> Set a User-Agent header in your requests to mimic a real browser. <br />
+  <strong>  Limit Request Rate: </strong> Avoid making too many requests in a short period; use delays between requests. <br />
+
+  <strong> Rotate IP Addresses:</strong> If possible, use a rotating IP proxy service to prevent IP bans. <br />
+
+  <strong>  Use Headless Browsing:</strong> Use headless browsers like Selenium with the --headless option to run without a visible browser window. <br />
+
+  <strong>  Use Request Session: </strong> Utilize the requests library's session feature to persist cookies and maintain a session. <br />
+
+  <strong> Handle CAPTCHAs:</strong> If a website uses CAPTCHAs, consider using CAPTCHA-solving services or manual intervention <br />
+
+  </p>
+</div>
+
+<div class="content">
+<h1><strong>Dealing with Dynamic Content</strong></h1> <br />
+  <p>Some websites load content dynamically using AJAX or other techniques. To scrape such content, you can inspect network requests made by the website and simulate those requests in your scraping script. Tools like Browser Developer Tools (e.g., Chrome DevTools) can help you identify the relevant network requests and parameters.</p> <br />
+
+<p>Additionally, libraries like Requests and Selenium allow you to send HTTP requests and handle dynamic content retrieval programmatically.</p>
+</div>
+
+
+<div class="content">
+<h1><strong>Handling Login and Authentication</strong></h1> <br />
+  <p>For websites that require user authentication, you can use tools like Selenium to automate login processes. Here's a simplified example:</p> <br />
+
+  <p className="snippet">
+
+  <pre>
+        <code>
+        {`from selenium import webdriver
+
+# Set up Selenium
+driver = webdriver.Chrome(executable_path='path/to/chromedriver')
+
+# Open the login page
+driver.get('https://example.com/login')
+
+# Fill in the login form fields
+username_input = driver.find_element_by_name('username')
+password_input = driver.find_element_by_name('password')
+username_input.send_keys('your_username')
+password_input.send_keys('your_password')
+
+# Submit the form
+login_button = driver.find_element_by_xpath('//button[@type="submit"]')
+login_button.click()
+
+# Continue scraping authenticated content
+# ...
+
+# Don't forget to close the driver when done
+driver.quit()
+
+`
+}
+        </code>
+      </pre>
+</p> <br />
+</div>
+
+
+<div class="content">
+<h1><strong>Robots.txt</strong></h1> <br />
+  <p>Before scraping a website, it's important to check its robots.txt file, which provides guidelines on whether web crawlers are allowed and which parts of the website they can access. Always respect the rules outlined in the robots.txt file to avoid legal issues and maintain good web scraping practices.</p> <br />
+
+
+  <h1><strong>Error Handling and Retry Strategies</strong></h1> <br />
+  <p>When scraping large amounts of data or dealing with network requests, errors can occur. Implement robust error handling and retry strategies to handle timeouts, network issues, and other unexpected problems gracefully. This may include logging errors, delaying retries, or changing IP addresses.</p> <br />
+
+
+  <h1><strong>Legal and Ethical Considerations</strong></h1> <br />
+  <p>Always ensure that your web scraping activities comply with legal and ethical guidelines. Respect website terms of service, privacy policies, and copyright laws. Scraping should be for legitimate purposes, and you should avoid scraping sensitive or personal information.</p> <br />
+
+
+  <h1><strong>Rate Limiting and Throttling</strong></h1> <br />
+  <p>To avoid overloading a website's server with requests, implement rate limiting and throttling mechanisms in your scraping script. This can help you stay within acceptable usage limits and maintain a good</p> <br />
+
+
+</div>
+
+
+
+    </div>
+  );
+};
+
+export default AdvanceWebScrapping;