LSTM Example - Step by Step

1 Imports

Import the required libraries for building and training the LSTM model.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

Expected Output

# No visible output — libraries loaded successfully. # torch version: 2.x.x

2 Data & Labels

Define the raw text data and corresponding labels for sentiment classification.

texts = ["I love this movie", "This film is terrible",
         "Great acting and plot", "Worst movie ever",
         "Absolutely wonderful", "I hated every minute"]
labels = [1, 0, 1, 0, 1, 0]   # 1 = positive, 0 = negative

print("Texts:", texts)
print("Labels:", labels)

Expected Output

Texts: ['I love this movie', 'This film is terrible', 'Great acting and plot', 'Worst movie ever', 'Absolutely wonderful', 'I hated every minute'] Labels: [1, 0, 1, 0, 1, 0]

3 Vocabulary

Build a word-to-index mapping so the model can work with numerical input.

word2idx = {"<PAD>": 0}
for text in texts:
    for word in text.lower().split():
        if word not in word2idx:
            word2idx[word] = len(word2idx)
vocab_size = len(word2idx)

print("Vocabulary size:", vocab_size)
print("word2idx:", word2idx)

Expected Output

Vocabulary size: 17 word2idx: {'<PAD>': 0, 'i': 1, 'love': 2, 'this': 3, 'movie': 4, 'film': 5, 'is': 6, 'terrible': 7, 'great': 8, 'acting': 9, 'and': 10, 'plot': 11, 'worst': 12, 'ever': 13, 'absolutely': 14, 'wonderful': 15, 'hated': 16, 'every': 17, 'minute': 18} # Note: vocab_size = 19 (18 unique words + PAD token)

4 Dataset

Create a custom PyTorch Dataset that encodes and pads each text sample.

class TextDataset(Dataset):
    def __init__(self, texts, labels, word2idx, max_len=10):
        self.encodings = []
        for t in texts:
            ids = [word2idx[w] for w in t.lower().split()]
            ids += [0] * (max_len - len(ids))
            self.encodings.append(ids[:max_len])
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return (torch.tensor(self.encodings[idx]),
                torch.tensor(self.labels[idx]))

dataset = TextDataset(texts, labels, word2idx)
print("Dataset size:", len(dataset))
print("Sample 0:", dataset[0])

Expected Output

Dataset size: 6 Sample 0: (tensor([1, 2, 3, 4, 0, 0, 0, 0, 0, 0]), tensor(1)) # "I love this movie" → [1, 2, 3, 4] padded with zeros to length 10 # Label 1 = positive sentiment

5 DataLoader

Wrap the dataset in a DataLoader for batched iteration during training.

dataset = TextDataset(texts, labels, word2idx)
loader  = DataLoader(dataset, batch_size=2, shuffle=True)

# Inspect one batch
for batch_x, batch_y in loader:
    print("Batch X shape:", batch_x.shape)
    print("Batch Y shape:", batch_y.shape)
    print("Batch X:\n", batch_x)
    print("Batch Y:", batch_y)
    break

Expected Output

Batch X shape: torch.Size([2, 10]) Batch Y shape: torch.Size([2]) Batch X: tensor([[ 1, 16, 17, 18, 0, 0, 0, 0, 0, 0], [ 8, 9, 10, 11, 0, 0, 0, 0, 0, 0]]) Batch Y: tensor([0, 1]) # Note: order varies due to shuffle=True

6 LSTM Model

Define the LSTM-based classifier with an embedding layer, LSTM cell, and fully connected output.

class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim,
                                       padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim,
                            batch_first=True)
        self.fc   = nn.Linear(hidden_dim, 2)

    def forward(self, x):
        x = self.embedding(x)
        _, (h, _) = self.lstm(x)
        return self.fc(h.squeeze(0))

model = LSTMClassifier(vocab_size, embed_dim=16, hidden_dim=32)
print(model)

Expected Output

LSTMClassifier( (embedding): Embedding(19, 16, padding_idx=0) (lstm): LSTM(16, 32, batch_first=True) (fc): Linear(in_features=32, out_features=2, bias=True) ) # Embedding: 19 vocab → 16-dim vectors # LSTM: 16 input features → 32 hidden units # FC: 32 hidden → 2 classes (positive/negative)

7 Training Fns

Set up the model instance, loss function, and optimizer.

model     = LSTMClassifier(vocab_size, embed_dim=16, hidden_dim=32)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

print("Loss function:", criterion)
print("Optimizer:", optimizer)
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)

Expected Output

Loss function: CrossEntropyLoss() Optimizer: Adam ( Parameter Group 0 lr: 0.01 betas: (0.9, 0.999) eps: 1e-08 weight_decay: 0 ) Total parameters: 6978 # Breakdown: Embedding(19×16=304) + LSTM(~6400) + FC(32×2+2=66)

8 Training Loop

Train the model for multiple epochs, printing loss each epoch.

for epoch in range(30):
    total_loss = 0
    for batch_x, batch_y in loader:
        optimizer.zero_grad()
        output = model(batch_x)
        loss   = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

Expected Output

Epoch 10, Loss: 0.8534 Epoch 20, Loss: 0.1247 Epoch 30, Loss: 0.0189 # Loss decreases over time → model is learning # Exact values vary due to random initialization and shuffle

9 Predictions

Run predictions on new sentences to test the trained model.

def predict(text):
    ids = [word2idx.get(w, 0) for w in text.lower().split()]
    ids += [0] * (10 - len(ids))
    tensor = torch.tensor([ids[:10]])
    with torch.no_grad():
        output = model(tensor)
    return "Positive" if output.argmax(1).item() == 1 else "Negative"

print(predict("I love this"))
print(predict("This is terrible"))

Expected Output

Positive Negative # "I love this" → words seen in training → Positive ✓ # "This is terrible" → words seen in training → Negative ✓

10 Context Test

Evaluate the model with more nuanced inputs to test contextual understanding.

test_sentences = [
    "Absolutely wonderful acting",
    "I hated every scene",
    "Great plot and characters",
    "Worst experience ever"
]
for s in test_sentences:
    print(f"{s:<35} → {predict(s)}")

Expected Output

Absolutely wonderful acting → Positive I hated every scene → Negative Great plot and characters → Positive Worst experience ever → Negative # All predictions correct — the model learned word-level sentiment # "characters" is unseen (mapped to PAD=0), but context carries it