-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBooleanToInt.py
More file actions
29 lines (21 loc) · 759 Bytes
/
BooleanToInt.py
File metadata and controls
29 lines (21 loc) · 759 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas as pd
import numpy as np
PATH = '/Users/mustafa/documents/Year 4/403/'
FILE = 'new_trump.csv'
trump_tweets_df = pd.read_csv(PATH + FILE)
#checks if there is an @ in the tweet
def bool_to_int(bool):
if bool is True:
return 1
return 0
#clean up tweets and find out if there are mentions or media
trump_tweets_df['media'] = trump_tweets_df['media'].map(bool_to_int)
trump_tweets_df['mention'] = trump_tweets_df['mention'].map(bool_to_int)
trump_tweets_df['hashtag'] = trump_tweets_df['hashtag'].map(bool_to_int)
msk = np.random.rand(len(trump_tweets_df)) < 0.7
train = trump_tweets_df[msk]
test = trump_tweets_df[~msk]
print(len(train))
print(len(test))
train.to_csv(PATH + "training.csv")
test.to_csv(PATH + "testing.csv")