Solution for KeyError: 0 in CustomDataGenerator class I created
is Given Below:
The error looks like this and occurs after successful run of the model for some part of the first epoch:
Epoch 1/50
937/938 [============================>.] - ETA: 0s - loss: 0.0089 - accuracy: 0.9913
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2897 try:
-> 2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
10 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
-> 2900 raise KeyError(key) from err
2901
2902 if tolerance is not None:
KeyError: 0
My custom data generator looks as follows:
class CustomDataGenerator(ks.utils.Sequence):
def __init__(self, dataframe, x_col, y_col, img_h, img_w, batch_size):
self.dataframe = dataframe
self.x_col = x_col
self.y_col = y_col
self.img_h = img_h
self.img_w = img_w
self.batch_size = batch_size
def __len__(self):
return math.ceil(self.dataframe.shape[0] / self.batch_size)
def __getitem__(self, index):
X = np.empty(shape=(self.batch_size, self.img_w, self.img_h, 3), dtype="float32")
Y = np.empty(shape=(self.batch_size, self.img_w, self.img_h, 1), dtype="float32")
for i in range(self.batch_size):
img_path = self.dataframe[self.x_col][index * self.batch_size + i]
img = cv.imread(img_path)
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
img_np = np.array(img, dtype="float32")
img_np = img_np.reshape(-1, self.img_h, self.img_w, 3)
img_np = img_np / 255.
mask_path = self.dataframe[self.y_col][index * self.batch_size + i]
mask = cv.imread(mask_path, 0)
mask_np = np.array(mask, dtype="float32")
mask_np = mask_np.reshape(-1, self.img_h, self.img_w, 1)
mask_np = mask_np / 255.
X[i, :, :, :] = img_np
Y[i, :, :, :] = mask_np
return X, Y
def on_epoch_end(self):
self.dataframe = self.dataframe.sample(frac=1)
self.dataframe.reset_index(inplace=True, drop=True)
size = 16
train_gen = CustomDataGenerator(dataframe=train_df, x_col="Images", y_col="Masks", img_h=128, img_w=128, batch_size=size)
val_gen = CustomDataGenerator(dataframe=val_df, x_col="Images", y_col="Masks", img_h=128, img_w=128, batch_size=size)
test_gen = CustomDataGenerator(dataframe=test_df, x_col="Images", y_col="Masks", img_h=128, img_w=128, batch_size=size)
The dataframe consists of 2 columns; one containing input images and another containing output masks. The dataset can be found here:
https://www.kaggle.com/hngngn/portrait-segmentation-128×128
The KeyError might have occurred because 0 does not exist in the index.
For integer-location based indexing of a data frame use .iloc
img_path = self.dataframe[self.x_col].iloc[index * self.batch_size + i]
mask_path = self.dataframe[self.y_col].iloc[index * self.batch_size + i]