import torch
import argparse
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn import Module, ReLU, Linear, Softmax
from avalanche.benchmarks.generators import dataset_benchmark, tensors_benchmark
from avalanche.benchmarks.utils import AvalancheDataset, make_classification_dataset
from avalanche.training.supervised import EWC
from avalanche.evaluation.metrics import (
forgetting_metrics,
accuracy_metrics,
loss_metrics,
bwt_metrics
)
from avalanche.logging import InteractiveLogger, TensorboardLogger
from avalanche.training.plugins import EvaluationPlugin
# Create torch dataset class
class dataSet(Dataset):
def __init__(self, x, y):
self.x = torch.as_tensor(x)
self.targets = torch.as_tensor(y)
def __len__(self):
return self.x.shape[0]
def __getitem__(self, index):
return self.x[index, :], self.targets[index]
# Create MLP model
class MLP(Module):
def __init__(self, in_num, out_num, hidden_num1, hidden_num2, hidden_num3):
super(MLP, self).__init__()
self.fc1 = Linear(in_features=in_num, out_features=hidden_num1)
self.relu1 = ReLU()
self.fc2 = Linear(in_features=hidden_num1, out_features=hidden_num2)
self.relu2 = ReLU()
self.fc3 = Linear(in_features=hidden_num2, out_features=hidden_num3)
self.relu3 = ReLU()
self.fc4 = Linear(in_features=hidden_num3, out_features=out_num)
def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.relu3(x)
x = self.fc4(x)
return x
def main(args):
# --- CONFIG
if args.dataset == "covid":
path = "covid_dataset"
elif args.dataset == "diab":
path = "diabetes_dataset"
in_num = 155 # the number of input features
hidden_num1 = 256 # number of neurons in hidden layers
hidden_num2 = 128 # number of neurons in hidden layers
hidden_num3 = 128 # number of neurons in hidden layers
out_num = 3 # number of classes
# Prepare training and test datasets for both distributions
x_train_1 = np.load(path + 'x_train_1.npy').astype(np.float32)
x_test_1 = np.load(path + 'x_test_1.npy').astype(np.float32)
x_train_2 = np.load(path + 'x_train_2.npy').astype(np.float32)
x_test_2 = np.load(path + 'x_test_2.npy').astype(np.float32)
# Transform the labels into one-hot encoding
y_train_1 = np.load(path + 'y_train_1.npy').astype(np.int_)
y_test_1 = np.load(path + 'y_test_1.npy').astype(np.int_)
y_train_2 = np.load(path + 'y_train_2.npy').astype(np.int_)
y_test_2 = np.load(path + 'y_test_2.npy').astype(np.int_)
# Instantiate datasets
train_1 = dataSet(x_train_1, y_train_1)
test_1 = dataSet(x_test_1, y_test_1)
train_2 = dataSet(x_train_2, y_train_2)
test_2 = dataSet(x_test_2, y_test_2)
train_1 = make_classification_dataset(train_1, task_labels=0)
test_1 = make_classification_dataset(test_1, task_labels=0)
train_2 = make_classification_dataset(train_2, task_labels=0)
test_2 = make_classification_dataset(test_2, task_labels=0)
# check if selected GPU is available or use CPU
assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
device = torch.device(
f"cuda:{args.cuda}"
if torch.cuda.is_available() and args.cuda >= 0
else "cpu"
)
print(f"Using device: {device}")
# ---------
# --- SCENARIO CREATION
# generic_scenario = tensors_benchmark(
# train_tensors=[(x_train_1, y_train_1), (x_train_2, y_train_2)],
# test_tensors=[(x_test_1, y_test_1), (x_test_2, y_test_2)],
# task_labels=[0, 1]
# )
generic_scenario = dataset_benchmark([train_1, train_2], [test_1, test_2])
# ---------
# MODEL CREATION
model = MLP(in_num, out_num, hidden_num1, hidden_num2, hidden_num3).to(device)
if args.optim == 'sgd':
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
elif args.optim == 'adam':
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
criterion = torch.nn.CrossEntropyLoss()
# DEFINE THE EVALUATION PLUGIN AND LOGGER
interactive_logger = InteractiveLogger()
eval_plugin = EvaluationPlugin(
accuracy_metrics(
minibatch=False, epoch=True, experience=True, stream=True
),
loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
forgetting_metrics(experience=True, stream=True),
bwt_metrics(experience=True, stream=True),
loggers=[interactive_logger]
)
if args.ewc_mode == 'separate':
args.decay_factor = None
# create strategy
strategy = EWC(
model,
optimizer,
criterion,
args.ewc_lambda,
args.ewc_mode,
decay_factor=args.decay_factor,
train_epochs=args.epochs,
device=device,
train_mb_size=args.minibatch_size,
evaluator=eval_plugin,
)
# train on the selected scenario with the chosen strategy
print("Starting experiment...")
results = []
acc_history = []
for experience in generic_scenario.train_stream:
print("Start training on experience ", experience.current_experience)
strategy.train(experience)
print("End training on experience", experience.current_experience)
print("Computing accuracy on the test set")
results.append(strategy.eval(generic_scenario.test_stream))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset",
type=str,
choices=["covid", "diab"],
default="covid",
help="Choose between covid and diab.",
)
parser.add_argument(
"--ewc_mode",
type=str,
choices=["separate", "online"],
default="separate",
help="Choose between EWC and online.",
)
parser.add_argument(
"--ewc_lambda",
type=float,
default=0.4,
help="Penalty hyperparameter for EWC",
)
parser.add_argument(
"--decay_factor",
type=float,
default=0.1,
help="Decay factor for importance " "when ewc_mode is online.",
)
parser.add_argument("--optim", type=str, choices=["sgd", "adam"], default="sgd", help="Optimizer.")
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
parser.add_argument("--momentum", type=float, default=9e-1, help="Momentum.")
parser.add_argument(
"--epochs", type=int, default=300, help="Number of training epochs."
)
parser.add_argument(
"--minibatch_size", type=int, default=128, help="Minibatch size."
)
parser.add_argument(
"--cuda",
type=int,
default=0,
help="Specify GPU id to use. Use CPU if -1.",
)
args = parser.parse_args()
main(args)
🐛 Describe the bug
I prepare 2 sets of training and test Pytorch datasets from 2 different domains with the EWC plugin to perform domain-incremental CL. However, the evaluation results show no CL is performed. The evaluation results are the same as performing naive fine-tuning. This bug happens for both
dataset_benchmarkandtensors_benchmark.🐜 To Reproduce
Due to the confidentiality of my dataset, I cannot share the dataset here. Below is my working manuscript for your information for debugging.
🐝 Expected behavior
I expect the EWC plugin to perform domain-incremental CL with EWC on my self-defined datasets from 2 different domains with
dataset_benchmark.🐞 Screenshots



Results from EWC:
Results from Naive Fine-tuning:
Results from my own CL strategy: