diff --git a/.cicada/my-pipeline.ts b/.cicada/my-pipeline.ts new file mode 100644 index 0000000..53f3d96 --- /dev/null +++ b/.cicada/my-pipeline.ts @@ -0,0 +1,20 @@ +import { Job, Pipeline } from "https://deno.land/x/cicada@v0.1.32/lib.ts"; + +const job = new Job({ + name: "My First Job", + image: "ubuntu:22.04", + steps: [ + { + name: "Print a message", + run: "echo Hello, world!", + }, + { + name: "Run a js function", + run: () => { + console.log("Hello from js"); + }, + }, + ], +}); + +export default new Pipeline([job]); diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..dcab9d3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "deno.enablePaths": [".cicada"] +} \ No newline at end of file diff --git a/apps/scraper/index.py b/apps/scraper/index.py index c189906..d1237b7 100644 --- a/apps/scraper/index.py +++ b/apps/scraper/index.py @@ -34,9 +34,9 @@ def upload_to_postgres(file): if __name__ == "__main__": # If output file is not found then refresh data - if glob('./output/*.csv'): - print("Output found will not refresh data...") - else: - print("Output not found refreshing data...") - data_file = scrape_website() - upload_to_postgres(data_file) + # if glob('./output/*.csv'): + # print("Output found will not refresh data...") + # else: + # print("Output not found refreshing data...") + # data_file = scrape_website() + upload_to_postgres("./output/restaurants1658265034.csv") diff --git a/apps/scraper/scraper.py b/apps/scraper/scraper.py index aad62b2..063a632 100644 --- a/apps/scraper/scraper.py +++ b/apps/scraper/scraper.py @@ -29,7 +29,7 @@ def __init__(self): self.restaurants = [] self.url = base_url + query + '1' self.content = bs(requests.get(self.url).content, 'html.parser') - self.delay_rate = 0.15 + self.delay_rate = 0.02 self.total_pages = self.get_total_pages(self.content) self.cur_year = date.today().year