dacort
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 54 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 15 additions & 1 deletion b/‎README.md‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎faker_cli/cli.py‎
Lines changed: 19 additions & 15 deletions b/‎faker_cli/cli.py‎
Lines changed: 19 additions & 15 deletions
diff --git a/‎faker_cli/writer.py‎
Lines changed: 31 additions & 6 deletions b/‎faker_cli/writer.py‎
Lines changed: 31 additions & 6 deletions
@@ -0,0 +1,54 @@
+name: Faker CLI Test Suite
+on: [push]
+jobs:
+  pytest:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ["3.9", "3.10", "3.11"]
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash
+    steps:
+    - uses: actions/checkout@v3
+    - 
+      name: Set up Python
+      id: setup-python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    -
+      name: cache poetry install
+      uses: actions/cache@v3
+      with:
+        path: ~/.local
+        key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-poetry-1.3.2-0
+    - 
+      name: Install Poetry
+      uses: snok/install-poetry@v1
+      with:
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+        installer-parallel: true
+    - 
+      name: Load cached venv
+      id: cached-poetry-dependencies
+      uses: actions/cache@v3
+      with:
+        path: .venv
+        key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
+    - 
+      name: Install dependencies
+      if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+      run: |
+        poetry install --no-interaction --no-root
+    - 
+      name: Install project
+      run: |
+        poetry install --no-interaction
+    - 
+      name: Run tests
+      run: |
+        source $VENV
+        poetry run pytest -v
@@ -2,7 +2,7 @@
 
 Faker is an awesome Python library, but I often just want a simple command I can run to generate data in a variety of formats.
 
-With Faker CLI, you can easily generate CSV or JSON data with fields of your choosing.
+With Faker CLI, you can easily generate CSV, JSON, or Parquet data with fields of your choosing.
 
 You can also utilize pre-built templates for common data formats!
 
@@ -70,6 +70,20 @@ fake -n 10 pyint,user_name,date_this_year -f json -c id,awesome_name,last_attent
 {"id": 1967, "awesome_name": "jmendoza", "last_attention_at": "2023-01-23"}
 ```
 
+### Parquet
+
+OK, it had to happen, you can even write Parquet. 
+
+```bash
+fake -n 10 pyint,user_name,date_this_year -f parquet -o sample.parquet
+```
+
+_youcanevenwritestraighttos3_ 🤭
+
+```bash
+fake -n 10 pyint,user_name,date_this_year -f parquet -o s3://YOUR_BUCKET/data/sample.parquet
+```
+
 ## Templates
 
 Want to generate 1 MILLION S3 Access logs in ~2 minutes? Now you can.
 
@@ -3,7 +3,7 @@
 import sys
 from faker_cli.templates import CloudFrontWriter, S3AccessLogs, S3AccessWriter, CloudTrailLogs, CloudFrontLogs
 
-from faker_cli.writer import CSVWriter, JSONWriter
+from faker_cli.writer import CSVWriter, JSONWriter, ParquetWriter
 from typing import List
 
 def infer_column_names(col_names, col_types: str) -> List[str]:
@@ -18,7 +18,8 @@ def infer_column_names(col_names, col_types: str) -> List[str]:
 
 KLAS_MAPPER = {
     "csv": CSVWriter,
-    "json": JSONWriter
+    "json": JSONWriter,
+    "parquet": ParquetWriter,
 }
 
 TEMPLATE_MAPPER = {
@@ -32,11 +33,12 @@ def infer_column_names(col_names, col_types: str) -> List[str]:
 
 @click.command()
 @click.option("--num-rows", "-n", default=1, help="Number of rows")
-@click.option("--format", "-f", type=click.Choice(["csv", "json"]), default="csv", help="Format of the output")
+@click.option("--format", "-f", type=click.Choice(["csv", "json", "parquet"]), default="csv", help="Format of the output")
+@click.option("--output", "-o", type=click.Path(writable=True))
 @click.option("--columns", "-c", help="Column names", default=None, required=False)
 @click.option("--template", "-t", help="Template to use", type=click.Choice(["s3access", "cloudfront"]), default=None)
 @click.argument("column_types", required=False)
-def main(num_rows, format, columns, template, column_types):
+def main(num_rows, format, output, columns, template, column_types):
     """
     Generate fake data, easily.
 
@@ -45,13 +47,23 @@ def main(num_rows, format, columns, template, column_types):
 
     You can also use --template for real-world synthetic data.
     """
+    # Do some initial validation - we must have either template or column tpes
     if not template and not column_types:
         ctx = click.get_current_context()
         click.echo(ctx.get_help())
         ctx.exit()
         raise click.BadArgumentUsage(
             "either --template or a list of Faker property names must be provided."
         )
+
+    # Parquet output requires a filename
+    if format == "parquet" and output is None:
+        raise click.BadArgumentUsage("parquet format requires --output/-o filename parameter.")
+    if output is not None and format != "parquet":
+        raise click.BadArgumentUsage("output files not supported for csv/json yet.")
+    
+    # If the user provides a template, we use that provider and writer and exit.
+    # We assume a template has a custom writer that may be different than CSV or JSON
     if template:
         writer = TEMPLATE_MAPPER[template][0](sys.stdout, None)
         log_entry = TEMPLATE_MAPPER[template][1]
@@ -60,20 +72,12 @@ def main(num_rows, format, columns, template, column_types):
             writer.write(row)
         return
 
+    # Now, if a template hasn't been provided, generate some fake data!
     col_types = column_types.split(",")
     headers = infer_column_names(columns, column_types)
-    writer = KLAS_MAPPER.get(format)(sys.stdout, headers)
+    writer = KLAS_MAPPER.get(format)(sys.stdout, headers, output)
     for i in range(num_rows):
         # TODO: Handle args
         row = [ fake.format(ctype) for ctype in col_types ]
         writer.write(row)
-    # Convert columns to templates
-    # if format == "csv":
-    #     column_types = [f"{{{{{x}}}}}" for x in column_types.split(',')]
-    #     print(fake.csv(data_columns=(column_types), num_rows=num_rows))
-    # elif format == "json":
-    #     # convert column_types into a dict
-    #     cols = column_types.split(",")
-    #     column_def = dict(zip(cols, cols))
-    #     print(fake.json(data_columns=column_def, num_rows=num_rows))
-
+    writer.close()
@@ -1,33 +1,58 @@
 import csv
 import json
+from typing import Optional
+import pyarrow as pa
+import pyarrow.parquet as pq
+
 
 class Writer:
-    def __init__(self, output, headers):
+    def __init__(self, output, headers, filename: Optional[str] = None):
         self.output = output
         self.headers = headers
         self.writer = None
 
     def write(self, row):
         pass
-    
+
     def close(self):
-        self.writer.close()
+        pass
+
 
 class CSVWriter(Writer):
-    def __init__(self, output, headers):
+    def __init__(self, output, headers, filename):
         super().__init__(output, headers)
         self.writer = csv.writer(self.output)
         self.write(headers)
 
     def write(self, row):
         self.writer.writerow(row)
 
+
 class JSONWriter(Writer):
-    def __init__(self, output, headers):
+    def __init__(self, output, headers, filename):
         super().__init__(output, headers)
         self.writer = self.output
 
     def write(self, row):
         jsonl = json.dumps(dict(zip(self.headers, row)), default=str)
         self.writer.write(jsonl)
-        self.writer.write('\n')
+        self.writer.write("\n")
+
+
+class ParquetWriter(Writer):
+    def __init__(self, output, headers, filename):
+        super().__init__(output, headers)
+        self.filename = filename
+        self.table: pa.Table = None
+
+    def write(self, row):
+        ini_dict = [{k: [v]} for k, v in list(zip(self.headers, row))]
+        tbl = {k: v for d in ini_dict for k, v in d.items()}
+        table = pa.table(tbl)
+        if self.table is None:
+            self.table = table
+        else:
+            self.table = pa.concat_tables([self.table, table])
+
+    def close(self):
+        pq.write_table(self.table, self.filename)