1+ import torch
2+ import torch .nn as nn
3+ import torch .optim as optim
4+ import random
5+
6+ class CharRNN (nn .Module ):
7+ def __init__ (self , vocab_size , hidden_size = 128 , num_layers = 1 ):
8+ super ().__init__ ()
9+ self .embedding = nn .Embedding (vocab_size , hidden_size )
10+ self .rnn = nn .RNN (hidden_size , hidden_size , num_layers , batch_first = True )
11+ self .fc = nn .Linear (hidden_size , vocab_size )
12+ self .num_layers = num_layers
13+ self .hidden_size = hidden_size
14+
15+ def forward (self , x , hidden ):
16+ x = self .embedding (x )
17+ out , hidden = self .rnn (x , hidden )
18+ out = self .fc (out )
19+ return out , hidden
20+
21+ def init_hidden (self , batch_size ):
22+ return torch .zeros (self .num_layers , batch_size , self .hidden_size )
23+
24+ def build_vocab (text ):
25+ chars = sorted (set (text ))
26+ char2idx = {ch : i for i , ch in enumerate (chars )}
27+ idx2char = {i : ch for i , ch in enumerate (chars )}
28+ return char2idx , idx2char
29+
30+ def prepare_data (text , char2idx , seq_length ):
31+ data = []
32+ for i in range (len (text ) - seq_length ):
33+ seq = text [i :i + seq_length ]
34+ target = text [i + 1 :i + seq_length + 1 ]
35+ seq_idx = [char2idx [ch ] for ch in seq ]
36+ target_idx = [char2idx [ch ] for ch in target ]
37+ data .append ((seq_idx , target_idx ))
38+ return data
39+
40+ def train (text , seq_length = 40 , epochs = 50 , lr = 0.005 ):
41+ char2idx , idx2char = build_vocab (text )
42+ vocab_size = len (char2idx )
43+ device = torch .device ('cuda' if torch .cuda .is_available () else 'cpu' )
44+ model = CharRNN (vocab_size ).to (device )
45+ data = prepare_data (text , char2idx , seq_length )
46+ criterion = nn .CrossEntropyLoss ()
47+ optimizer = optim .Adam (model .parameters (), lr = lr )
48+ for epoch in range (epochs ):
49+ total_loss = 0
50+ random .shuffle (data )
51+ for seq_idx , target_idx in data :
52+ inputs = torch .tensor ([seq_idx ], dtype = torch .long ).to (device )
53+ targets = torch .tensor ([target_idx ], dtype = torch .long ).to (device )
54+ hidden = model .init_hidden (1 ).to (device )
55+ outputs , _ = model (inputs , hidden )
56+ loss = criterion (outputs .view (- 1 , vocab_size ), targets .view (- 1 ))
57+ optimizer .zero_grad ()
58+ loss .backward ()
59+ optimizer .step ()
60+ total_loss += loss .item ()
61+ if (epoch + 1 ) % 10 == 0 :
62+ print (f"Epoch { epoch + 1 } , Loss: { total_loss / len (data ):.4f} " )
63+ return model , char2idx , idx2char , device
64+
65+ def generate (model , char2idx , idx2char , seed , length = 200 , temperature = 1.0 , device = 'cpu' ):
66+ model .eval ()
67+ chars = list (seed )
68+ hidden = model .init_hidden (1 ).to (device )
69+ for _ in range (length ):
70+ input_idx = torch .tensor ([[char2idx .get (chars [- 1 ], 0 )]], dtype = torch .long ).to (device )
71+ output , hidden = model (input_idx , hidden )
72+ output = output [0 , - 1 ] / temperature
73+ probs = torch .softmax (output , dim = 0 ).detach ().cpu ().numpy ()
74+ next_idx = random .choices (range (len (probs )), weights = probs )[0 ]
75+ chars .append (idx2char [next_idx ])
76+ return '' .join (chars )
77+
78+ if __name__ == "__main__" :
79+ corpus = (
80+ "The quick brown fox jumps over the lazy dog. "
81+ "Machine learning is fun. "
82+ "PyTorch makes building neural networks easy. "
83+ "Text generation with RNNs is simple."
84+ ) * 5
85+ model , char2idx , idx2char , device = train (corpus , seq_length = 40 , epochs = 50 )
86+ print ("\n Generated text:\n " )
87+ print (generate (model , char2idx , idx2char , seed = "The quick" , length = 200 , temperature = 0.8 , device = device ))
0 commit comments