Transformer-Topic-Classifier-Coding-vs-Travel/deneme_model.py at main · lutfiozark/Transformer-Topic-Classifier-Coding-vs-Travel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
# %% Kutuphaneler
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import string
from collections import Counter

# %% veri on isleme

coding_sentences = [
    "I am writing a Python script",
    "This function returns the correct output",
    "The code compiles without any errors",
    "I pushed the changes to the repository",
    "We fixed a bug in the login system",
    "The model is training on the dataset",
    "I use Git to manage my projects",
    "This class handles the user input",
    "We deployed the app to the server",
    "The algorithm runs in linear time",
    "I need to refactor this function",
    "We are testing the new feature",
    "This API endpoint returns JSON data",
    "I added unit tests for this module",
    "The script processes the files automatically",
    "We store the results in a database",
    "The web app runs in the browser",
    "I am learning how to use PyTorch",
    "This loop iterates over the list",
    "We use Docker for containerization",
    "The frontend sends a request to the backend",
    "I created a new branch for this task",
    "The network model converged after ten epochs",
    "We monitor the logs for errors",
    "This variable holds the user data",
    "I configured the environment variables",
    "The response time is under one second",
    "I implemented a simple recommendation system",
    "We use version control for collaboration",
    "The code is organized into separate modules",
    "I wrote a script to clean the dataset",
    "The program reads input from a file",
    "We optimized the query to run faster",
    "The training loss decreased over time",
    "I am debugging the authentication flow",
    "We deployed a new version of the service",
    "This project uses a microservice architecture",
    "I am testing the model with new samples",
    "The script schedules tasks to run daily",
    "We log each request to the server",
    "I connected the database to the backend",
    "We reviewed the pull request together",
    "The configuration file defines all settings",
    "I documented the main functions in the code",
    "We scheduled an automated backup job",
    "The server listens on port eight thousand",
    "I created a virtual environment for this project",
    "We use continuous integration for testing",
    "The optimizer updates the model parameters",
    "I changed the learning rate for better results",
    "We exported the trained model to a file",
    "The data loader batches the training samples",
    "I created a simple command line tool",
    "We added logging to track runtime errors",
    "The script downloads data from the API",
    "I validated the input before processing it",
    "We use JSON format to store configuration",
    "The frontend is built with a component library",
    "I wrote a small utility to rename files",
    "We added pagination to the list view",
    "The application uses a RESTful architecture",
    "I cached the results to improve performance",
    "We verified the output against sample data",
    "The training script runs on the GPU",
    "I inspected the gradients during training",
    "We created a baseline model first",
    "I plotted the loss curve after training",
    "We implemented custom callbacks for training",
    "The endpoint requires authentication tokens",
    "I handled exceptions to avoid crashes",
    "We defined the schema for the user table",
    "The build pipeline runs on every commit",
    "I used a seed value for reproducibility",
    "We benchmarked the function on large inputs",
    "The test suite covers the main scenarios",
    "I merged the feature branch into main",
    "We use environment variables for secrets",
    "The notebook shows the experiment results",
    "I saved the model checkpoints after each epoch",
    "We used a pre-trained model for fine-tuning",
    "The function validates the request body",
    "I enabled debug mode to trace the issue",
    "We added a health check endpoint",
    "The scheduler triggers the job every hour",
    "I wrote a script to generate synthetic data",
    "We visualized the predictions on a chart",
    "The code uses an object-oriented design",
    "I created a helper function for preprocessing",
    "We cleaned up unused imports in the file",
    "The main script calls all the modules",
    "I profiled the code to find bottlenecks",
    "We updated the dependencies to the latest versions",
    "The API documentation describes all routes",
    "I converted the notebook into a script",
    "We validated the model on a test split",
    "The logger writes messages to a file",
    "I separated the config into another module",
    "We added comments to explain tricky parts",
    "The script can be executed from the terminal",
    "I pushed the final version to the remote repository",
    "I added an option to run the script in debug mode",
    "We created a new configuration file for the staging environment",
    "I changed the batch size to improve training stability",
    "We inspected the logs to understand the failing requests",
    "The script automatically cleans temporary files after execution",
    "I moved some logic into a separate helper module",
    "We added a retry mechanism for network timeouts",
    "The model predictions are saved in a results folder",
    "I used a regular expression to parse the text file",
    "We created a simple dashboard to view metrics",
    "The code now supports both CPU and GPU execution",
    "I wrote a function to validate all configuration values",
    "We generated a report at the end of the training run",
    "The script now accepts command line arguments",
    "I updated the README to explain the new features",
    "We separated training and evaluation into different scripts",
    "The program exits gracefully when an error occurs",
    "I added type hints to improve code readability",
    "We used a scheduler to reduce the learning rate over time",
    "The script compresses the output files after processing",
    "I implemented early stopping to prevent overfitting",
    "We created a simple web interface to run predictions",
    "The system sends a notification when training is finished",
    "I cached intermediate results to speed up computations",
    "We defined custom loss functions for the model",
    "The script checks if the input path exists before running",
    "I added progress bars to show the training status",
    "We wrote integration tests for the main workflow",
    "The feature extraction step runs before the classifier",
    "I used a configuration library to manage settings",
    "We added support for multiple output formats",
    "The script now logs warnings and errors separately",
    "I refactored the data preprocessing into reusable functions",
    "We used a seed to reproduce the random split of data",
    "The metrics are printed at the end of each training epoch",
    "I implemented a callback to save the best performing model",
    "We used a grid search to tune some hyperparameters",
    "The code now ignores hidden files in the input directory",
    "I rewrote the loop to use list comprehensions where possible",
    "We used a profiler to measure memory usage during training",
    "The build script installs all required dependencies",
    "I added assertions to check important assumptions in the code",
    "We used comments to document complex parts of the algorithm",
    "The function now returns both predictions and confidence scores",
    "I exported some figures to visualize the distribution of labels",
    "We configured the logger to print timestamps with each message",
    "The script can now resume training from a saved checkpoint",
    "I wrapped the main logic in a try except block for safety",
    "We used a separate virtual environment for this experiment",
    "The repository includes example data for quick testing",
    "I pushed a hotfix to solve the production issue",
    "We reviewed the code style before merging the branch",
    "The model summary shows all layers and parameter counts",
    "I added a command to clear the cache directory",
    "We scheduled the training job to run over the weekend",
    "The function now supports both JSON and CSV inputs",
    "I checked the shape of tensors before passing them to the model",
    "We exported the trained network to an interoperable format",
    "The pipeline processes raw data and produces cleaned output",
    "I logged the random seed values for reproducibility",
    "I implemented a login page with form validation",
    "We encrypted the user passwords before storing them",
    "The mobile app sends data to a cloud server",
    "I reduced the latency by optimizing the database queries",
    "We added a dark mode option to the interface",
    "The script renames images based on their timestamp",
    "I connected the frontend to a cloud API gateway",
    "We created a simple chatbot using a language model",
    "The configuration now supports multiple environments",
    "I added an option to export the results as a CSV file",
    "We tested the endpoint with different user roles",
    "The code checks if the user has permission to access this page",
    "I implemented a search bar with live suggestions",
    "We added a loading spinner during long operations",
    "The service automatically restarts if it crashes",
    "I wrote a small script to merge several text files",
    "We used a message queue to handle background tasks",
    "The dashboard shows real time usage statistics",
    "I added tags to group related experiments together",
    "We used an ORM to interact with the database",
    "The script validates email addresses before sending messages",
    "I configured cross origin resource sharing for the API",
    "We added rate limiting to protect the service from abuse",
    "The function now supports optional keyword arguments",
    "I used a decorator to log function calls",
    "We collected runtime metrics using a monitoring agent",
    "The code checks for missing fields in the request body",
    "I used a context manager to handle file operations safely",
    "We applied data normalization before training the model",
    "The notebook includes explanations for each step of the pipeline",
    "I compressed the dataset into a single archive file",
    "We used a hashing function to anonymize user identifiers",
    "The training script prints intermediate evaluation metrics",
    "I added a simple graphical interface on top of the script",
    "We created a sandbox environment to test risky changes",
    "The scheduler triggers different tasks at fixed intervals",
    "I used a dictionary to map labels to integer IDs",
    "We defined a custom dataset class for loading the images",
    "The program automatically creates missing output folders",
    "I wrote a helper to format timestamps in log messages",
    "We used a callback to stop training when validation loss increased",
    "The module exposes a clean and minimal public API",
    "I implemented a small cache for repeated function results",
    "We stored experiment parameters in a separate JSON file",
    "The script warns the user if disk space is low",
    "I printed a summary of all configuration options at startup",
    "We secured the admin page behind an authentication check",
    "The command line tool accepts several optional flags",
    "I added colorized output to make logs easier to read",
    "We simulated network delays to test the client behaviour",
    "The training pipeline can handle multiple datasets",
    "I implemented a rollback mechanism for failed deployments",
    "We created a template project structure for new services",
    "The function returns a dictionary of computed statistics",
    "I used list slicing to select a subset of elements",
    "We added a separate settings file for production",
    "The code measures how long each step takes to execute",
    "I integrated an external library for drawing charts",
    "We scheduled log rotation to prevent files from growing too large",
    "The script exits with an error code if something goes wrong",
    "I wrote a tiny API client to test the remote service"
]

travel_sentences = [
    "I booked a hotel near the city center",
    "We are planning a trip next month",
    "The flight leaves early in the morning",
    "I packed my suitcase last night",
    "We walked around the old town",
    "The train ride was quiet and comfortable",
    "I bought a map at the station",
    "We visited a museum in the afternoon",
    "The hotel room has a large window",
    "We tried a local restaurant for dinner",
    "The beach was calm and not crowded",
    "I took a lot of photos on the trip",
    "We followed the guide through the streets",
    "The weather was warm and dry",
    "We used the metro to get around",
    "I checked the schedule at the bus stop",
    "We arrived at the airport on time",
    "The hostel was clean and simple",
    "We visited several towns in one day",
    "I kept my passport in a safe place",
    "We found a small cafe on the corner",
    "The road to the village was narrow",
    "We watched the sunset by the sea",
    "I bought a ticket for the night bus",
    "We stayed in a quiet neighborhood",
    "The guide showed us a famous bridge",
    "We walked along the river in the evening",
    "I used a travel app to find directions",
    "We waited in line at the border",
    "The city center was full of people",
    "We rented a car for the weekend",
    "I checked in at the hotel reception",
    "We stopped at a gas station on the way",
    "The park was close to our hotel",
    "We took a ferry across the lake",
    "I looked at the map in the lobby",
    "We visited a small village in the mountains",
    "The streets were narrow and crowded",
    "We spent the night near the station",
    "I wrote about the trip in my notebook",
    "We booked tickets for the evening train",
    "I reserved a window seat on the plane",
    "We walked through the market in the morning",
    "The hotel lobby was bright and spacious",
    "I asked the receptionist for a city map",
    "We took a day trip to a nearby island",
    "The bus ride through the hills was beautiful",
    "I tried a traditional dessert at a cafe",
    "We joined a small walking tour",
    "The streets were quiet early in the morning",
    "We stayed in a guesthouse run by a local family",
    "I bought a postcard from a street vendor",
    "We crossed a long bridge over the river",
    "The view from the hill was amazing",
    "We spent the afternoon at the seaside",
    "I listened to music during the long flight",
    "We shared a taxi to the city center",
    "The hotel offered a simple breakfast",
    "We looked for souvenirs in a small shop",
    "I used a translation app to talk to locals",
    "We checked the weather before leaving the hotel",
    "The train station was crowded and noisy",
    "We took a lot of photos in the square",
    "I drank coffee while waiting for the bus",
    "We stayed only one night in that town",
    "The hotel window looked over a busy street",
    "We followed the signs to the old castle",
    "I tried a new dish at a local restaurant",
    "We climbed many stairs to reach the viewpoint",
    "The city at night was full of lights",
    "We had to wait at the baggage claim",
    "I charged my phone at the airport lounge",
    "We walked along the coast in the afternoon",
    "The air was fresh near the mountains",
    "We changed trains at the main station",
    "I bought a small gift at the airport shop",
    "We visited a famous square in the city center",
    "The museum ticket included an audio guide",
    "We took a boat tour around the harbor",
    "I checked the departure board several times",
    "We stayed close to a large public park",
    "The flight was delayed for two hours",
    "We sat near the window on the train",
    "I saved all my travel photos in a folder",
    "We used a paper map instead of a phone",
    "The bus stopped at a small roadside cafe",
    "We had breakfast on the hotel terrace",
    "I watched people walking in the main square",
    "We ordered local food at a small restaurant",
    "The taxi dropped us near the town center",
    "We walked slowly through the narrow alleys",
    "I kept my ticket in my wallet",
    "We waited for the ferry at the pier",
    "The hotel room had a view of the sea",
    "We followed the river path to the bridge",
    "I wrote the day’s events in my travel journal",
    "We planned the next day’s route on the map",
    "The city bus took us back to the hotel",
    "We left our luggage at the reception desk",
    "We took a short walk around the hotel after dinner",
    "I checked the local time before setting my alarm",
    "We asked a local person for directions to the station",
    "The bus ticket was cheaper than we expected",
    "We waited under a small shelter during the rain",
    "I bought a bottle of water from a street vendor",
    "We listened to street musicians in the city square",
    "The train windows showed a view of green fields",
    "We crossed a busy intersection near the market",
    "I wrote down the address of the hostel in my notebook",
    "We stopped to rest on a bench in the park",
    "The small shop sold postcards and magnets",
    "We followed the signs to the airport terminal",
    "I checked my passport again while in the queue",
    "We walked through a quiet residential street",
    "The bus driver announced the next stop clearly",
    "We watched the clouds from the airplane window",
    "I used my phone to translate the menu at dinner",
    "We shared a table with another traveler in the cafe",
    "The train station had a large electronic timetable",
    "We took a picture in front of the old monument",
    "I kept my boarding pass in my jacket pocket",
    "We looked for a place to sit near the fountain",
    "The air inside the plane felt dry",
    "We arrived at the platform just before the train",
    "I checked the departure time on my ticket",
    "We walked slowly along the harbor in the evening",
    "The hotel receptionist gave us a simple city map",
    "We passed by a small bakery on a side street",
    "I bought a snack while waiting at the terminal",
    "We looked at the mountains from the bus window",
    "The square was filled with people taking photos",
    "We turned down a narrow alley to reach the hostel",
    "I charged my phone using a socket at the station",
    "We saw a group of tourists following a guide",
    "The luggage carousel moved slowly in the airport",
    "We chose a small table near the window in the cafe",
    "I checked the weather forecast before leaving the room",
    "We stopped to read a sign about the old building",
    "The train passed over several small bridges",
    "We walked back to the hotel after sunset",
    "I wrote the name of the street in my notebook",
    "We listened to the sound of waves by the shore",
    "The bus stopped often to pick up passengers",
    "We waited a few minutes at a red traffic light",
    "I looked for the hotel name on the tall building",
    "We crossed a wide street filled with cars and bikes",
    "The tram was crowded during the early evening",
    "We ordered coffee and watched people outside",
    "I put my ticket in a small travel wallet",
    "We checked the map again at the corner",
    "The path to the viewpoint was steep but short",
    "We took a break near a tree on the hill",
    "I used offline maps to find our hotel",
    "We found a quiet bench near the river",
    "The flight landed later than scheduled",
    "We walked past a row of old houses",
    "I kept my camera in a small backpack",
    "We followed the river until we reached the bridge",
    "The bus station had a small waiting room",
    "We changed some local money at the currency exchange office",
    "I saved the hotel address in my phone contacts",
    "We checked the visa requirements before booking the trip",
    "The bus passed through several small villages on the way",
    "We spent the afternoon sitting by the harbor",
    "I bought a simple guidebook from a small bookstore",
    "We followed a walking route suggested by the tourist office",
    "The hostel kitchen was shared by all guests",
    "We tried a street food stall near the market",
    "I kept all my travel tickets in a small folder",
    "We watched boats moving slowly across the bay",
    "The city map showed several museums and galleries",
    "We took a night walk under the city lights",
    "I checked the train platform number on the display board",
    "We stopped at a viewpoint overlooking the countryside",
    "The narrow roads made the bus ride feel longer",
    "We asked the driver where to get off for the old town",
    "I wrote down a list of places to visit during the day",
    "We walked past a church with a tall tower",
    "The air near the sea felt cool and fresh",
    "We tried a dessert that was popular in that region",
    "I took a photo of the station sign as a memory",
    "We sat in the shade near a stone wall",
    "The small port had only a few fishing boats",
    "We used a paper ticket to enter the museum",
    "I checked the local bus timetable pinned to the wall",
    "We bought two simple sandwiches from a bakery",
    "The hotel corridor was long and quiet",
    "We waited for our luggage near the exit door",
    "I kept my phone in airplane mode during most of the flight",
    "We took a local bus instead of a taxi to save money",
    "The room had a small balcony facing the street",
    "We walked along a path surrounded by trees",
    "I noted the metro line color to remember the route",
    "We changed to another train at the central station",
    "The city map had symbols for parks and monuments",
    "We visited a large square with a fountain in the middle",
    "I tried to read some signs written in the local language",
    "We took a short ferry ride to a nearby island",
    "The hostel common room had sofas and a small TV",
    "We watched people taking photos near the monument",
    "I bought a simple notebook only for travel notes",
    "We crossed a metal bridge over a wide river",
    "The station had a small cafe selling coffee and tea",
    "We stopped for a moment to look at the sunset",
    "I checked the seating number printed on my ticket",
    "We passed a field full of yellow flowers on the train ride",
    "The city bus had a screen showing the next stop",
    "We looked at the menu posted outside the restaurant",
    "I used the hotel WiFi to send messages home",
    "We shared a small umbrella while walking in the rain",
    "The airport signs were clear and easy to follow",
    "We found a quiet street away from the busy center",
    "I bought a fridge magnet with the city name on it",
    "We walked beside a canal with several small boats",
    "The tram stopped right in front of our hotel",
    "We planned our route for the next day over breakfast",
    "I checked the local time zone before setting my watch",
    "We followed a path along the cliffs by the sea",
    "The small village had only one grocery store"
]


def proprocess(text):
    text.lower()
    text.translate(str.maketrans("", "", string.punctuation))
    return text


# veri seti olusturma
data = coding_sentences + travel_sentences
labels = [1] * len(coding_sentences) + [0] * len(travel_sentences)

# veri on isleme
data = [proprocess(sentence) for sentence in data]

all_words = "".join(data).split()
word_counts = Counter(all_words)
vocab = {word: idx+1 for idx, (word, _) in enumerate(word_counts.items())}
vocab["<PAD>"] = 0  # padding özel token tanımlama

max_len = 15


def sentence_to_tensor(sentence, vocab, max_len=15):
    tokens = sentence.split()
    indices = [vocab.get(word, 0) for word in tokens]
    indices = indices[:max_len]
    indices += [0] * (max_len - len(indices))
    return torch.tensor(indices)


X = torch.stack([sentence_to_tensor(sentence, vocab, max_len)
                for sentence in data])
y = torch.tensor(labels)

# train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=42)


# %% Transformer modelinin olusturma
class TransformerClass(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_heads, num_layers, hidden_dim, num_classes):
        super(TransformerClass, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.positional_encoding = nn.Parameter(
            torch.randn(1, max_len, embedding_dim))
        self.transformer = nn.Transformer(d_model=embedding_dim,  # embedding vektör boyutu
                                          nhead=num_heads,  # multi head attention mekanizmasındaki başlık sayısı
                                          num_decoder_layers=num_layers,  # transformer encode katman sayısı
                                          dim_feedforward=hidden_dim  # encoder içerisinde bulunan gizli katman sayısı
                                          )
        self.fc = nn.Linear(embedding_dim*max_len, hidden_dim)
        self.out = nn.Linear(hidden_dim, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        embedded = self.embedding(x) + self.positional_encoding
        output = self.transformer(embedded, embedded)
        output = output.view(output.size(0), -1)
        output = torch.relu(self.fc(output))
        output = self.out(output)
        output = self.sigmoid(output)
        return output
# %% Model egitimi


vocab_size = len(vocab)
embedding_dim = 32
num_heads = 4
num_layers = 4
hidden_dim = 64
num_classes = 1

model = TransformerClass(vocab_size, embedding_dim,
                         num_heads, num_layers, hidden_dim, num_classes)

# loss ve optimizer tanimlama
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# training

number_epochs = 100
model.train()
for epoch in range(number_epochs):
    optimizer.zero_grad()
    output = model(X_train.long()).squeeze(-1)
    loss = criterion(output, y_train.float())
    loss.backward()
    optimizer.step()

    print(f"Epoch: {epoch+1}/{number_epochs} Loss: {loss}")


# %% model degerlendirmesi

model.eval()

with torch.no_grad():
    y_pred = model(X_test.long()).squeeze()
    y_pred = (y_pred > 0.5).float()

    y_pred_training = model(X_train.long()).squeeze()
    y_pred_training = (y_pred_training > 0.5).float()

accuarcy = accuracy_score(y_test, y_pred)
print(f"Test Accuarcy: {accuarcy}")

accuarcy_train = accuracy_score(y_train, y_pred_training)
print(f"Train Accuarcy: {accuarcy_train}")