Skip to content

Commit b800238

Browse files
committed
fix: rework handleExistingDev
Signed-off-by: Tim Serong <tserong@suse.com>
1 parent 68b6729 commit b800238

File tree

1 file changed

+107
-78
lines changed

1 file changed

+107
-78
lines changed

pkg/controller/blockdevice/scanner.go

Lines changed: 107 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -126,88 +126,127 @@ func (s *Scanner) collectAllDevices() []*deviceWithAutoProvision {
126126
return allDevices
127127
}
128128

129-
// TODO: This function needs a bit of re-working:
130-
// - There are two paths here where we re-active an inactive device,
131-
// one is if the device path has changed, the other is if it's a
132-
// multipath device. But, it's possible for a device to go away,
133-
// then come back with the original device path. This case isn't
134-
// handled. We should try to consolidate all three cases if possible.
135-
// - The isDevAlreadyProvisioned() check will always return false,
136-
// because it's checking newBd, but should be checking oldDb (this
137-
// doesn't actually cause any practical problems because the
138-
// subsequent NeedsAutoProvision() check uses oldBd correctly, but
139-
// it means the log messages aren't quite right).
140-
// - I'm not actually sure the whole if/else structure here works
141-
// properly. What if a device was incative, and is now active,
142-
// but _also_ needs to be autoprovisioned? If I'm reading this
143-
// correctly, that case won't be handled until the next scanner
144-
// run (which might not be until the node is rebooted in the worst
145-
// case).
146-
// - No, I'm wrong, it's fine, that'll be picked up in OnBlockDeviceChanged
129+
// handleExistingDev will update an existing BD CR based on the current state
130+
// of an actual block device on the system. It will return true if newBd
131+
// (the detected block device) really does correspond to oldBd (the BD CR),
132+
// and was potentially updated in some way, or false if newBd doesn't
133+
// actually correspond to oldBd.
147134
//
148-
// TODO: This needs to pick up all new status, incl. new UUID and WWN!!!
149-
// No, that's not quite true:
135+
// Note:
150136
// - If the device was inactive before it's OK for its status to change,
151137
// notably the device path. It should also be fine if the buspath
152138
// changes (maybe it got moved somwehere else)
153139
// - Changes of vendor+model+serial would always be unexpected
154140
// - Changes of UUID might be unexpected (unless a UUID got added due
155141
// to a device being provisioned, but that shouldn't come by this
156-
// code path.
142+
// code path).
157143
// - Changes of WWN might be unexepcted, except in that weird case where
158144
// a kernel update changed the WWNs of existing disks.
159-
func (s *Scanner) handleExistingDev(oldBd *diskv1.BlockDevice, newBd *diskv1.BlockDevice, autoProvisioned bool) {
160-
if isDevPathChanged(oldBd, newBd) {
161-
// Dev Path will change when device is temporarily gone and back.
162-
// Node reboot might also cause the device path change (depends on device interrupt).
163-
// According to the above, we should update the device path once it changes.
164-
if oldBd.Status.State == diskv1.BlockDeviceInactive {
165-
logrus.Infof("The inactive block device %s with wwn %s is coming back", newBd.Name, newBd.Status.DeviceStatus.Details.WWN)
166-
oldBd.Status.State = diskv1.BlockDeviceActive
167-
}
168-
logrus.Infof("Try to update the device path of %s, which is changed from %s to %s.", oldBd.Name, oldBd.Status.DeviceStatus.DevPath, newBd.Status.DeviceStatus.DevPath)
169-
oldBd.Status.DeviceStatus.DevPath = newBd.Status.DeviceStatus.DevPath
170-
if _, err := s.Blockdevices.Update(oldBd); err != nil {
171-
logrus.Errorf("Update device %s status error, wake up scanner again: %v", oldBd.Name, err)
172-
s.Cond.Signal()
145+
func (s *Scanner) handleExistingDev(oldBd *diskv1.BlockDevice, newBd *diskv1.BlockDevice, autoProvisioned bool) bool {
146+
oldBdCp := oldBd.DeepCopy()
147+
148+
if oldBd.Status.State == diskv1.BlockDeviceActive {
149+
// The BD is currently active. In this case the dev path shouldn't change, but
150+
// it's possible that some other details may have changed (the UUID if someone
151+
// manually created a filesystem on an unprovisioned device, or the WWN in
152+
// rare cases where a kernel update messes with device naming)
153+
if isDevPathChanged(oldBd, newBd) {
154+
// The dev path for an active device has changed. This means that newBd
155+
// doesn't actually correspond to oldBd. This should never happen in
156+
// normal operation, but can happen if e.g. a multipath BD exists, but
157+
// multipathd isn't started anymore. In this case newBd will point to
158+
// one of the raw devices. The same is true in reverse. Either way,
159+
// we want to skip this and return false so that later the BD can be
160+
// marked inactive or removed.
161+
logrus.WithFields(logrus.Fields{
162+
"name": oldBd.Name,
163+
"device": oldBd.Status.DeviceStatus.DevPath,
164+
"newDevice": newBd.Status.DeviceStatus.DevPath,
165+
}).Warn("new device path detected for active device - skipping update")
166+
return false
173167
}
174-
} else if isDevAlreadyProvisioned(newBd) {
175-
logrus.Debugf("Skip the provisioned device: %s", newBd.Name)
176-
} else if s.NeedsAutoProvision(oldBd, autoProvisioned) {
177-
logrus.Debugf("Enqueue block device %s for auto-provisioning", newBd.Name)
178-
s.Blockdevices.Enqueue(s.Namespace, newBd.Name)
179-
} else if isDMDevice(oldBd.Status.DeviceStatus.DevPath) {
180-
// Other dm devices should be filtered our at the beginning.
181-
// So, we focus multipath device checking here.
182-
// If system doesn't enable multipathd, the block device will become inactive after reboot.
183-
// So, we need to add a checking after starting multipathd.
184-
// Then the block device will become active from inactive.
185-
oldBdCp := oldBd.DeepCopy()
186-
187-
// We've checked it outside, we can skip error
188-
path, _ := filepath.EvalSymlinks(oldBd.Status.DeviceStatus.DevPath)
189-
if _, err := utils.IsMultipathDevice(path); err == nil {
190-
logrus.Infof("The multipath device %s is available, change it to active.", oldBd.Name)
168+
// DevPath isn't changed, but other things might, e.g. UUID if someone manually formatted a disk
169+
oldBdCp.Status.DeviceStatus.Capacity = newBd.Status.DeviceStatus.Capacity
170+
oldBdCp.Status.DeviceStatus.Details = newBd.Status.DeviceStatus.Details
171+
} else {
172+
// The BD is inactive. This can happen if a provisioned device is
173+
// temporarily gone and has come back. It can also happen for provisioned
174+
// multipath devices if the system is rebooted without multipathd enabled,
175+
// and then later multipathd is started again.
176+
if strings.HasPrefix(oldBd.Status.DeviceStatus.DevPath, "/dev/mapper") {
177+
if isDevPathChanged(oldBd, newBd) {
178+
logrus.WithFields(logrus.Fields{
179+
"name": oldBd.Name,
180+
"device": oldBd.Status.DeviceStatus.DevPath,
181+
"newDevice": newBd.Status.DeviceStatus.DevPath,
182+
}).Warn("new device path detected for inactive multipath device - skipping update")
183+
return false
184+
}
185+
path, _ := filepath.EvalSymlinks(oldBd.Status.DeviceStatus.DevPath)
186+
if _, err := utils.IsMultipathDevice(path); err == nil {
187+
logrus.WithFields(logrus.Fields{
188+
"name": oldBd.Name,
189+
}).Info("reactivating multipath device")
190+
oldBdCp.Status.State = diskv1.BlockDeviceActive
191+
// DeviceStatus really shouldn't have changed at this point.
192+
// TODO: should we log if it did?
193+
oldBdCp.Status.DeviceStatus.Capacity = newBd.Status.DeviceStatus.Capacity
194+
oldBdCp.Status.DeviceStatus.Details = newBd.Status.DeviceStatus.Details
195+
}
196+
} else {
197+
logrus.WithFields(logrus.Fields{
198+
"name": oldBd.Name,
199+
"wwn": newBd.Status.DeviceStatus.Details.WWN,
200+
}).Infof("reactivating block device")
201+
if isDevPathChanged(oldBd, newBd) {
202+
logrus.WithFields(logrus.Fields{
203+
"name": oldBd.Name,
204+
"device": newBd.Status.DeviceStatus.DevPath,
205+
}).Info("got new device path")
206+
oldBdCp.Status.DeviceStatus.DevPath = newBd.Status.DeviceStatus.DevPath
207+
}
191208
oldBdCp.Status.State = diskv1.BlockDeviceActive
209+
// This pulls in all status updates -- devpath, wwn, uuid, vendor, model, serial, ...
210+
// TODO: log what's changed?
211+
oldBdCp.Status.DeviceStatus.Capacity = newBd.Status.DeviceStatus.Capacity
212+
oldBdCp.Status.DeviceStatus.Details = newBd.Status.DeviceStatus.Details
192213
}
214+
}
193215

194-
if reflect.DeepEqual(oldBd, oldBdCp) {
195-
logrus.Debugf("Skip updating multipath device %s", oldBd.Name)
196-
return
197-
}
198-
216+
if !reflect.DeepEqual(oldBd, oldBdCp) {
217+
logrus.WithFields(logrus.Fields{
218+
"name": oldBd.Name,
219+
}).Info("updating device")
220+
// TODO: log diff of what's updated?
199221
if _, err := s.Blockdevices.Update(oldBdCp); err != nil {
200-
logrus.Errorf("Update device %s status error, wake up scanner again: %v", oldBdCp.Name, err)
222+
logrus.WithFields(logrus.Fields{
223+
"name": oldBd.Name,
224+
"err": err,
225+
}).Error("error updating device, waking scanner")
201226
s.Cond.Signal()
202227
}
228+
} else if isDevAlreadyProvisioned(oldBd) {
229+
logrus.WithFields(logrus.Fields{
230+
"name": oldBd.Name,
231+
}).Debug("skipping provisioned device")
232+
} else if s.NeedsAutoProvision(oldBd, autoProvisioned) {
233+
logrus.WithFields(logrus.Fields{
234+
"name": oldBd.Name,
235+
}).Debug("enquing device for auto-provisioning")
236+
s.Blockdevices.Enqueue(s.Namespace, oldBd.Name)
203237
} else {
204-
logrus.Debugf("Skip updating device %s", newBd.Name)
238+
logrus.WithFields(logrus.Fields{
239+
"name": oldBd.Name,
240+
}).Debug("device is unchanged (no need to update)")
205241
}
242+
return true
206243
}
207244

208245
func (s *Scanner) deactivateOrDeleteBlockDevices(oldBds map[string]*diskv1.BlockDevice) error {
209246
for _, oldBd := range oldBds {
210247
// It should be fine for devices that aren't actually provisioned to go away
248+
// THIS IS BRILLIANT -- IF IT'S FOUND MP DEVICES AND THEY'RE NOT PROVISIONED
249+
// THEN WHEN THE MP DEVICE ACTIVATES THEY GO AWAY!
211250
if oldBd.Status.ProvisionPhase == diskv1.ProvisionPhaseUnprovisioned {
212251
logrus.Debugf("Delete device %s", oldBd.Name)
213252
if err := s.Blockdevices.Delete(oldBd.Namespace, oldBd.Name, &metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
@@ -355,15 +394,17 @@ func (s *Scanner) scanBlockDevicesOnNode(ctx context.Context) error {
355394
}
356395

357396
if existingBd != nil {
358-
// Pick up the name of the existing block device we found
397+
// Pick up the name of the existing block device we found (not strictly necessary,
398+
// but just in case we try to use newBd.name in handleExistingDev...)
359399
newBd.Name = existingBd.Name
360-
s.handleExistingDev(existingBd, newBd, autoProvisioned)
361-
// only first time to update the cache
362-
if !CacheDiskTags.Initialized() && existingBd.Spec.Tags != nil && len(existingBd.Spec.Tags) > 0 {
363-
CacheDiskTags.UpdateDiskTags(existingBd.Name, existingBd.Spec.Tags)
400+
if s.handleExistingDev(existingBd, newBd, autoProvisioned) {
401+
// only first time to update the cache
402+
if !CacheDiskTags.Initialized() && existingBd.Spec.Tags != nil && len(existingBd.Spec.Tags) > 0 {
403+
CacheDiskTags.UpdateDiskTags(existingBd.Name, existingBd.Spec.Tags)
404+
}
405+
// remove blockdevice from list of existing device names so we can delete missing devices afterward
406+
delete(existingBDsByName, newBd.Name)
364407
}
365-
// remove blockdevice from list of existing device names so we can delete missing devices afterward
366-
delete(existingBDsByName, newBd.Name)
367408
} else {
368409
// New block device, needs a name...
369410
newBd.Name = uuid.NewString()
@@ -522,15 +563,3 @@ func isDevPathChanged(oldBd *diskv1.BlockDevice, newBd *diskv1.BlockDevice) bool
522563
func isDevAlreadyProvisioned(newBd *diskv1.BlockDevice) bool {
523564
return newBd.Status.ProvisionPhase == diskv1.ProvisionPhaseProvisioned
524565
}
525-
526-
func isDMDevice(name string) bool {
527-
// Resolve symlink to check if it points to dm-x
528-
// Resolve /dev/mapper/0QEMU_QEMU_HARDDISK_disk1 -> /dev/dm-0
529-
// Resolve /dev/dm-0 -> /dev/dm-0
530-
path, err := filepath.EvalSymlinks(name)
531-
if err == nil && strings.Contains(path, "dm-") {
532-
return true
533-
}
534-
535-
return false
536-
}

0 commit comments

Comments
 (0)