33import sys
44from faker_cli .templates import CloudFrontWriter , S3AccessLogs , S3AccessWriter , CloudTrailLogs , CloudFrontLogs
55
6- from faker_cli .writer import CSVWriter , JSONWriter
6+ from faker_cli .writer import CSVWriter , JSONWriter , ParquetWriter
77from typing import List
88
99def infer_column_names (col_names , col_types : str ) -> List [str ]:
@@ -18,7 +18,8 @@ def infer_column_names(col_names, col_types: str) -> List[str]:
1818
1919KLAS_MAPPER = {
2020 "csv" : CSVWriter ,
21- "json" : JSONWriter
21+ "json" : JSONWriter ,
22+ "parquet" : ParquetWriter ,
2223}
2324
2425TEMPLATE_MAPPER = {
@@ -32,11 +33,12 @@ def infer_column_names(col_names, col_types: str) -> List[str]:
3233
3334@click .command ()
3435@click .option ("--num-rows" , "-n" , default = 1 , help = "Number of rows" )
35- @click .option ("--format" , "-f" , type = click .Choice (["csv" , "json" ]), default = "csv" , help = "Format of the output" )
36+ @click .option ("--format" , "-f" , type = click .Choice (["csv" , "json" , "parquet" ]), default = "csv" , help = "Format of the output" )
37+ @click .option ("--output" , "-o" , type = click .Path (writable = True ))
3638@click .option ("--columns" , "-c" , help = "Column names" , default = None , required = False )
3739@click .option ("--template" , "-t" , help = "Template to use" , type = click .Choice (["s3access" , "cloudfront" ]), default = None )
3840@click .argument ("column_types" , required = False )
39- def main (num_rows , format , columns , template , column_types ):
41+ def main (num_rows , format , output , columns , template , column_types ):
4042 """
4143 Generate fake data, easily.
4244
@@ -45,13 +47,23 @@ def main(num_rows, format, columns, template, column_types):
4547
4648 You can also use --template for real-world synthetic data.
4749 """
50+ # Do some initial validation - we must have either template or column tpes
4851 if not template and not column_types :
4952 ctx = click .get_current_context ()
5053 click .echo (ctx .get_help ())
5154 ctx .exit ()
5255 raise click .BadArgumentUsage (
5356 "either --template or a list of Faker property names must be provided."
5457 )
58+
59+ # Parquet output requires a filename
60+ if format == "parquet" and output is None :
61+ raise click .BadArgumentUsage ("parquet format requires --output/-o filename parameter." )
62+ if output is not None and format != "parquet" :
63+ raise click .BadArgumentUsage ("output files not supported for csv/json yet." )
64+
65+ # If the user provides a template, we use that provider and writer and exit.
66+ # We assume a template has a custom writer that may be different than CSV or JSON
5567 if template :
5668 writer = TEMPLATE_MAPPER [template ][0 ](sys .stdout , None )
5769 log_entry = TEMPLATE_MAPPER [template ][1 ]
@@ -60,20 +72,12 @@ def main(num_rows, format, columns, template, column_types):
6072 writer .write (row )
6173 return
6274
75+ # Now, if a template hasn't been provided, generate some fake data!
6376 col_types = column_types .split ("," )
6477 headers = infer_column_names (columns , column_types )
65- writer = KLAS_MAPPER .get (format )(sys .stdout , headers )
78+ writer = KLAS_MAPPER .get (format )(sys .stdout , headers , output )
6679 for i in range (num_rows ):
6780 # TODO: Handle args
6881 row = [ fake .format (ctype ) for ctype in col_types ]
6982 writer .write (row )
70- # Convert columns to templates
71- # if format == "csv":
72- # column_types = [f"{{{{{x}}}}}" for x in column_types.split(',')]
73- # print(fake.csv(data_columns=(column_types), num_rows=num_rows))
74- # elif format == "json":
75- # # convert column_types into a dict
76- # cols = column_types.split(",")
77- # column_def = dict(zip(cols, cols))
78- # print(fake.json(data_columns=column_def, num_rows=num_rows))
79-
83+ writer .close ()
0 commit comments