LSTM Example

Step-by-Step Walkthrough

1 Imports

Import the required libraries for building and training the LSTM model.

import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader
Expected Output
# No visible output — libraries loaded successfully. # torch version: 2.x.x

2 Data & Labels

Define the raw text data and corresponding labels for sentiment classification.

texts = ["I love this movie", "This film is terrible", "Great acting and plot", "Worst movie ever", "Absolutely wonderful", "I hated every minute"] labels = [1, 0, 1, 0, 1, 0] # 1 = positive, 0 = negative print("Texts:", texts) print("Labels:", labels)
Expected Output
Texts: ['I love this movie', 'This film is terrible', 'Great acting and plot', 'Worst movie ever', 'Absolutely wonderful', 'I hated every minute'] Labels: [1, 0, 1, 0, 1, 0]

3 Vocabulary

Build a word-to-index mapping so the model can work with numerical input.

word2idx = {"<PAD>": 0} for text in texts: for word in text.lower().split(): if word not in word2idx: word2idx[word] = len(word2idx) vocab_size = len(word2idx) print("Vocabulary size:", vocab_size) print("word2idx:", word2idx)
Expected Output
Vocabulary size: 17 word2idx: {'<PAD>': 0, 'i': 1, 'love': 2, 'this': 3, 'movie': 4, 'film': 5, 'is': 6, 'terrible': 7, 'great': 8, 'acting': 9, 'and': 10, 'plot': 11, 'worst': 12, 'ever': 13, 'absolutely': 14, 'wonderful': 15, 'hated': 16, 'every': 17, 'minute': 18} # Note: vocab_size = 19 (18 unique words + PAD token)

4 Dataset

Create a custom PyTorch Dataset that encodes and pads each text sample.

class TextDataset(Dataset): def __init__(self, texts, labels, word2idx, max_len=10): self.encodings = [] for t in texts: ids = [word2idx[w] for w in t.lower().split()] ids += [0] * (max_len - len(ids)) self.encodings.append(ids[:max_len]) self.labels = labels def __len__(self): return len(self.labels) def __getitem__(self, idx): return (torch.tensor(self.encodings[idx]), torch.tensor(self.labels[idx])) dataset = TextDataset(texts, labels, word2idx) print("Dataset size:", len(dataset)) print("Sample 0:", dataset[0])
Expected Output
Dataset size: 6 Sample 0: (tensor([1, 2, 3, 4, 0, 0, 0, 0, 0, 0]), tensor(1)) # "I love this movie" → [1, 2, 3, 4] padded with zeros to length 10 # Label 1 = positive sentiment

5 DataLoader

Wrap the dataset in a DataLoader for batched iteration during training.

dataset = TextDataset(texts, labels, word2idx) loader = DataLoader(dataset, batch_size=2, shuffle=True) # Inspect one batch for batch_x, batch_y in loader: print("Batch X shape:", batch_x.shape) print("Batch Y shape:", batch_y.shape) print("Batch X:\n", batch_x) print("Batch Y:", batch_y) break
Expected Output
Batch X shape: torch.Size([2, 10]) Batch Y shape: torch.Size([2]) Batch X: tensor([[ 1, 16, 17, 18, 0, 0, 0, 0, 0, 0], [ 8, 9, 10, 11, 0, 0, 0, 0, 0, 0]]) Batch Y: tensor([0, 1]) # Note: order varies due to shuffle=True

6 LSTM Model

Define the LSTM-based classifier with an embedding layer, LSTM cell, and fully connected output.

class LSTMClassifier(nn.Module): def __init__(self, vocab_size, embed_dim, hidden_dim): super().__init__() self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True) self.fc = nn.Linear(hidden_dim, 2) def forward(self, x): x = self.embedding(x) _, (h, _) = self.lstm(x) return self.fc(h.squeeze(0)) model = LSTMClassifier(vocab_size, embed_dim=16, hidden_dim=32) print(model)
Expected Output
LSTMClassifier( (embedding): Embedding(19, 16, padding_idx=0) (lstm): LSTM(16, 32, batch_first=True) (fc): Linear(in_features=32, out_features=2, bias=True) ) # Embedding: 19 vocab → 16-dim vectors # LSTM: 16 input features → 32 hidden units # FC: 32 hidden → 2 classes (positive/negative)

7 Training Fns

Set up the model instance, loss function, and optimizer.

model = LSTMClassifier(vocab_size, embed_dim=16, hidden_dim=32) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) print("Loss function:", criterion) print("Optimizer:", optimizer) total_params = sum(p.numel() for p in model.parameters()) print("Total parameters:", total_params)
Expected Output
Loss function: CrossEntropyLoss() Optimizer: Adam ( Parameter Group 0 lr: 0.01 betas: (0.9, 0.999) eps: 1e-08 weight_decay: 0 ) Total parameters: 6978 # Breakdown: Embedding(19×16=304) + LSTM(~6400) + FC(32×2+2=66)

8 Training Loop

Train the model for multiple epochs, printing loss each epoch.

for epoch in range(30): total_loss = 0 for batch_x, batch_y in loader: optimizer.zero_grad() output = model(batch_x) loss = criterion(output, batch_y) loss.backward() optimizer.step() total_loss += loss.item() if (epoch + 1) % 10 == 0: print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
Expected Output
Epoch 10, Loss: 0.8534 Epoch 20, Loss: 0.1247 Epoch 30, Loss: 0.0189 # Loss decreases over time → model is learning # Exact values vary due to random initialization and shuffle

9 Predictions

Run predictions on new sentences to test the trained model.

def predict(text): ids = [word2idx.get(w, 0) for w in text.lower().split()] ids += [0] * (10 - len(ids)) tensor = torch.tensor([ids[:10]]) with torch.no_grad(): output = model(tensor) return "Positive" if output.argmax(1).item() == 1 else "Negative" print(predict("I love this")) print(predict("This is terrible"))
Expected Output
Positive Negative # "I love this" → words seen in training → Positive ✓ # "This is terrible" → words seen in training → Negative ✓

10 Context Test

Evaluate the model with more nuanced inputs to test contextual understanding.

test_sentences = [ "Absolutely wonderful acting", "I hated every scene", "Great plot and characters", "Worst experience ever" ] for s in test_sentences: print(f"{s:<35} → {predict(s)}")
Expected Output
Absolutely wonderful acting → Positive I hated every scene → Negative Great plot and characters → Positive Worst experience ever → Negative # All predictions correct — the model learned word-level sentiment # "characters" is unseen (mapped to PAD=0), but context carries it