From 6cbe4c1d23fe4cbf17753f56aff59711af5da655 Mon Sep 17 00:00:00 2001 From: Sam Fish Date: Thu, 13 Apr 2023 15:32:53 -0700 Subject: [PATCH] Cicada init --- .cicada/my-pipeline.ts | 20 ++++++++++++++++++++ .vscode/settings.json | 3 +++ apps/scraper/index.py | 12 ++++++------ apps/scraper/scraper.py | 2 +- 4 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 .cicada/my-pipeline.ts create mode 100644 .vscode/settings.json diff --git a/.cicada/my-pipeline.ts b/.cicada/my-pipeline.ts new file mode 100644 index 0000000..53f3d96 --- /dev/null +++ b/.cicada/my-pipeline.ts @@ -0,0 +1,20 @@ +import { Job, Pipeline } from "https://deno.land/x/cicada@v0.1.32/lib.ts"; + +const job = new Job({ + name: "My First Job", + image: "ubuntu:22.04", + steps: [ + { + name: "Print a message", + run: "echo Hello, world!", + }, + { + name: "Run a js function", + run: () => { + console.log("Hello from js"); + }, + }, + ], +}); + +export default new Pipeline([job]); diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..dcab9d3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "deno.enablePaths": [".cicada"] +} \ No newline at end of file diff --git a/apps/scraper/index.py b/apps/scraper/index.py index c189906..d1237b7 100644 --- a/apps/scraper/index.py +++ b/apps/scraper/index.py @@ -34,9 +34,9 @@ def upload_to_postgres(file): if __name__ == "__main__": # If output file is not found then refresh data - if glob('./output/*.csv'): - print("Output found will not refresh data...") - else: - print("Output not found refreshing data...") - data_file = scrape_website() - upload_to_postgres(data_file) + # if glob('./output/*.csv'): + # print("Output found will not refresh data...") + # else: + # print("Output not found refreshing data...") + # data_file = scrape_website() + upload_to_postgres("./output/restaurants1658265034.csv") diff --git a/apps/scraper/scraper.py b/apps/scraper/scraper.py index aad62b2..063a632 100644 --- a/apps/scraper/scraper.py +++ b/apps/scraper/scraper.py @@ -29,7 +29,7 @@ def __init__(self): self.restaurants = [] self.url = base_url + query + '1' self.content = bs(requests.get(self.url).content, 'html.parser') - self.delay_rate = 0.15 + self.delay_rate = 0.02 self.total_pages = self.get_total_pages(self.content) self.cur_year = date.today().year