-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample0_logging.py
More file actions
55 lines (47 loc) · 2.7 KB
/
example0_logging.py
File metadata and controls
55 lines (47 loc) · 2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from textdetection.detection import Detector
import pandas as pd
import time
import logging
logging.basicConfig(filename='outputs/mylogs/mylogs.log', format='%(asctime)s - %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
class datasetCSV():
def __init__(self, path=None, text_column=None):
self.path = path
if self.path is None:
raise ValueError(f"The path argument cannot be None! It should be string!")
self.text_column = text_column
if self.text_column is None:
self.df = pd.read_csv(data_path)
else:
self.df = pd.read_csv(data_path)[self.text_column]
def detect_process_log(datasetObject, k=10, threshold=0.2, multiprocess=False, chunk=20):
detector = Detector(k=k, threshold=threshold)
start_time = time.time()
if multiprocess:
detector.multi_process(datasetObject.df, chunk=chunk, sless=0)
elif not multiprocess:
detector.process(datasetObject.df)
elapsed_time = time.time() - start_time
logging.info(
f"time\t>>>>\t{elapsed_time} s \tnum of detections: {len(detector.history)}\n\tdata: {datasetObject.path}\t"
f"len(data)={len(datasetObject.df)}\n\tDetector(k={k}, theshold={threshold}")
detector.print_result()
print(f"time elapsed : : : {elapsed_time}")
return len(detector.history)
logging.info(f'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< NEW START >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
# data_path = "../data/suspicious/albert-base-v2-sst2_textbugger_sequences_2020-11-20-03-20.csv" # text
# data_path = "../data/benign/glue.csv" # sentence
# data_path = "../data/suspicious/cnn-ag-news_deepwordbug_sequences_2020-11-30-18-29.csv" # text
# data_path = f"../data/benign/yelp_review_full_csv/new_versions/3_yelp.csv" # text
# data_path = "../data/suspicious/bert-base-uncased-ag-news_deepwordbug_sequences_2020-11-30-17-40.csv" # text
# data_path = "../data/suspicious/roberta-base-imdb_textbugger_sequences_2021-01-25-16-46.csv" # text
# data_path = "../data/suspicious/lstm-yelp_textbugger_sequences_2021-01-25-15-58.csv" # text
# data_path = "../data/suspicious/lstm-yelp_deepwordbug_sequences_2020-11-29-10-52.csv" # text
# data_path = "../data/suspicious/cnn-yelp_textbugger_sequences_2020-11-29-11-19.csv" # text
data_path = "../data/suspicious/cnn-yelp_deepwordbug_sequences_2020-11-20-15-34.csv" # text
datasetObject = datasetCSV(path=data_path, text_column='text')
start_time = time.time()
num_of_detection = detect_process_log(datasetObject, k=10, threshold=0.21,
multiprocess=False, chunk=60)
elapsed_time = time.time() - start_time
logging.info(f"<<<<<< FINISHED -- -- -- time passed: {elapsed_time} >>>>>>")