-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
63 lines (50 loc) · 2.06 KB
/
app.py
File metadata and controls
63 lines (50 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import pandas as pd
from scraper.core import AutoScraper
from ui.layout import render_sidebar, render_results, render_info_block
from ui.export import render_export_block
def main():
st.set_page_config(
page_title="Auto Web Scraper",
page_icon="🕷️",
layout="wide",
)
st.title("🕷️ Auto Product List Scraper")
st.markdown("*Automatically finds and extracts data from web pages*")
sidebar_state = render_sidebar()
url = sidebar_state["url"]
use_hasdata = sidebar_state["use_hasdata"]
api_key = sidebar_state["api_key"]
scrape_config = sidebar_state["scrape_config"]
force_generic = sidebar_state["force_generic"]
scrape_button = sidebar_state["scrape_button"]
if scrape_button and url:
if use_hasdata and not api_key:
st.error("❌ Please provide HasData API key")
return
with st.spinner("🔍 Analyzing page..."):
try:
scraper = AutoScraper(url, api_key=api_key, scrape_config=scrape_config)
result = scraper.scrape(force_generic=force_generic)
if not result:
st.error("❌ Could not find repeating structures on the page")
return
st.session_state["scraper"] = scraper
st.session_state["result"] = result
st.session_state["force_generic"] = force_generic
except Exception as e:
st.error(f"❌ Error: {str(e)}")
import traceback
st.code(traceback.format_exc())
return
if "result" in st.session_state:
scraper = st.session_state["scraper"]
result = st.session_state["result"]
force_generic = st.session_state.get("force_generic", False)
df = render_results(scraper, result, force_generic)
if df is not None:
render_export_block(df)
else:
render_info_block()
if __name__ == "__main__":
main()