@@ -126,88 +126,127 @@ func (s *Scanner) collectAllDevices() []*deviceWithAutoProvision {
126126 return allDevices
127127}
128128
129- // TODO: This function needs a bit of re-working:
130- // - There are two paths here where we re-active an inactive device,
131- // one is if the device path has changed, the other is if it's a
132- // multipath device. But, it's possible for a device to go away,
133- // then come back with the original device path. This case isn't
134- // handled. We should try to consolidate all three cases if possible.
135- // - The isDevAlreadyProvisioned() check will always return false,
136- // because it's checking newBd, but should be checking oldDb (this
137- // doesn't actually cause any practical problems because the
138- // subsequent NeedsAutoProvision() check uses oldBd correctly, but
139- // it means the log messages aren't quite right).
140- // - I'm not actually sure the whole if/else structure here works
141- // properly. What if a device was incative, and is now active,
142- // but _also_ needs to be autoprovisioned? If I'm reading this
143- // correctly, that case won't be handled until the next scanner
144- // run (which might not be until the node is rebooted in the worst
145- // case).
146- // - No, I'm wrong, it's fine, that'll be picked up in OnBlockDeviceChanged
129+ // handleExistingDev will update an existing BD CR based on the current state
130+ // of an actual block device on the system. It will return true if newBd
131+ // (the detected block device) really does correspond to oldBd (the BD CR),
132+ // and was potentially updated in some way, or false if newBd doesn't
133+ // actually correspond to oldBd.
147134//
148- // TODO: This needs to pick up all new status, incl. new UUID and WWN!!!
149- // No, that's not quite true:
135+ // Note:
150136// - If the device was inactive before it's OK for its status to change,
151137// notably the device path. It should also be fine if the buspath
152138// changes (maybe it got moved somwehere else)
153139// - Changes of vendor+model+serial would always be unexpected
154140// - Changes of UUID might be unexpected (unless a UUID got added due
155141// to a device being provisioned, but that shouldn't come by this
156- // code path.
142+ // code path) .
157143// - Changes of WWN might be unexepcted, except in that weird case where
158144// a kernel update changed the WWNs of existing disks.
159- func (s * Scanner ) handleExistingDev (oldBd * diskv1.BlockDevice , newBd * diskv1.BlockDevice , autoProvisioned bool ) {
160- if isDevPathChanged (oldBd , newBd ) {
161- // Dev Path will change when device is temporarily gone and back.
162- // Node reboot might also cause the device path change (depends on device interrupt).
163- // According to the above, we should update the device path once it changes.
164- if oldBd .Status .State == diskv1 .BlockDeviceInactive {
165- logrus .Infof ("The inactive block device %s with wwn %s is coming back" , newBd .Name , newBd .Status .DeviceStatus .Details .WWN )
166- oldBd .Status .State = diskv1 .BlockDeviceActive
167- }
168- logrus .Infof ("Try to update the device path of %s, which is changed from %s to %s." , oldBd .Name , oldBd .Status .DeviceStatus .DevPath , newBd .Status .DeviceStatus .DevPath )
169- oldBd .Status .DeviceStatus .DevPath = newBd .Status .DeviceStatus .DevPath
170- if _ , err := s .Blockdevices .Update (oldBd ); err != nil {
171- logrus .Errorf ("Update device %s status error, wake up scanner again: %v" , oldBd .Name , err )
172- s .Cond .Signal ()
145+ func (s * Scanner ) handleExistingDev (oldBd * diskv1.BlockDevice , newBd * diskv1.BlockDevice , autoProvisioned bool ) bool {
146+ oldBdCp := oldBd .DeepCopy ()
147+
148+ if oldBd .Status .State == diskv1 .BlockDeviceActive {
149+ // The BD is currently active. In this case the dev path shouldn't change, but
150+ // it's possible that some other details may have changed (the UUID if someone
151+ // manually created a filesystem on an unprovisioned device, or the WWN in
152+ // rare cases where a kernel update messes with device naming)
153+ if isDevPathChanged (oldBd , newBd ) {
154+ // The dev path for an active device has changed. This means that newBd
155+ // doesn't actually correspond to oldBd. This should never happen in
156+ // normal operation, but can happen if e.g. a multipath BD exists, but
157+ // multipathd isn't started anymore. In this case newBd will point to
158+ // one of the raw devices. The same is true in reverse. Either way,
159+ // we want to skip this and return false so that later the BD can be
160+ // marked inactive or removed.
161+ logrus .WithFields (logrus.Fields {
162+ "name" : oldBd .Name ,
163+ "device" : oldBd .Status .DeviceStatus .DevPath ,
164+ "newDevice" : newBd .Status .DeviceStatus .DevPath ,
165+ }).Warn ("new device path detected for active device - skipping update" )
166+ return false
173167 }
174- } else if isDevAlreadyProvisioned (newBd ) {
175- logrus .Debugf ("Skip the provisioned device: %s" , newBd .Name )
176- } else if s .NeedsAutoProvision (oldBd , autoProvisioned ) {
177- logrus .Debugf ("Enqueue block device %s for auto-provisioning" , newBd .Name )
178- s .Blockdevices .Enqueue (s .Namespace , newBd .Name )
179- } else if isDMDevice (oldBd .Status .DeviceStatus .DevPath ) {
180- // Other dm devices should be filtered our at the beginning.
181- // So, we focus multipath device checking here.
182- // If system doesn't enable multipathd, the block device will become inactive after reboot.
183- // So, we need to add a checking after starting multipathd.
184- // Then the block device will become active from inactive.
185- oldBdCp := oldBd .DeepCopy ()
186-
187- // We've checked it outside, we can skip error
188- path , _ := filepath .EvalSymlinks (oldBd .Status .DeviceStatus .DevPath )
189- if _ , err := utils .IsMultipathDevice (path ); err == nil {
190- logrus .Infof ("The multipath device %s is available, change it to active." , oldBd .Name )
168+ // DevPath isn't changed, but other things might, e.g. UUID if someone manually formatted a disk
169+ oldBdCp .Status .DeviceStatus .Capacity = newBd .Status .DeviceStatus .Capacity
170+ oldBdCp .Status .DeviceStatus .Details = newBd .Status .DeviceStatus .Details
171+ } else {
172+ // The BD is inactive. This can happen if a provisioned device is
173+ // temporarily gone and has come back. It can also happen for provisioned
174+ // multipath devices if the system is rebooted without multipathd enabled,
175+ // and then later multipathd is started again.
176+ if strings .HasPrefix (oldBd .Status .DeviceStatus .DevPath , "/dev/mapper" ) {
177+ if isDevPathChanged (oldBd , newBd ) {
178+ logrus .WithFields (logrus.Fields {
179+ "name" : oldBd .Name ,
180+ "device" : oldBd .Status .DeviceStatus .DevPath ,
181+ "newDevice" : newBd .Status .DeviceStatus .DevPath ,
182+ }).Warn ("new device path detected for inactive multipath device - skipping update" )
183+ return false
184+ }
185+ path , _ := filepath .EvalSymlinks (oldBd .Status .DeviceStatus .DevPath )
186+ if _ , err := utils .IsMultipathDevice (path ); err == nil {
187+ logrus .WithFields (logrus.Fields {
188+ "name" : oldBd .Name ,
189+ }).Info ("reactivating multipath device" )
190+ oldBdCp .Status .State = diskv1 .BlockDeviceActive
191+ // DeviceStatus really shouldn't have changed at this point.
192+ // TODO: should we log if it did?
193+ oldBdCp .Status .DeviceStatus .Capacity = newBd .Status .DeviceStatus .Capacity
194+ oldBdCp .Status .DeviceStatus .Details = newBd .Status .DeviceStatus .Details
195+ }
196+ } else {
197+ logrus .WithFields (logrus.Fields {
198+ "name" : oldBd .Name ,
199+ "wwn" : newBd .Status .DeviceStatus .Details .WWN ,
200+ }).Infof ("reactivating block device" )
201+ if isDevPathChanged (oldBd , newBd ) {
202+ logrus .WithFields (logrus.Fields {
203+ "name" : oldBd .Name ,
204+ "device" : newBd .Status .DeviceStatus .DevPath ,
205+ }).Info ("got new device path" )
206+ oldBdCp .Status .DeviceStatus .DevPath = newBd .Status .DeviceStatus .DevPath
207+ }
191208 oldBdCp .Status .State = diskv1 .BlockDeviceActive
209+ // This pulls in all status updates -- devpath, wwn, uuid, vendor, model, serial, ...
210+ // TODO: log what's changed?
211+ oldBdCp .Status .DeviceStatus .Capacity = newBd .Status .DeviceStatus .Capacity
212+ oldBdCp .Status .DeviceStatus .Details = newBd .Status .DeviceStatus .Details
192213 }
214+ }
193215
194- if reflect .DeepEqual (oldBd , oldBdCp ) {
195- logrus .Debugf ( "Skip updating multipath device %s" , oldBd . Name )
196- return
197- }
198-
216+ if ! reflect .DeepEqual (oldBd , oldBdCp ) {
217+ logrus .WithFields (logrus. Fields {
218+ "name" : oldBd . Name ,
219+ }). Info ( "updating device" )
220+ // TODO: log diff of what's updated?
199221 if _ , err := s .Blockdevices .Update (oldBdCp ); err != nil {
200- logrus .Errorf ("Update device %s status error, wake up scanner again: %v" , oldBdCp .Name , err )
222+ logrus .WithFields (logrus.Fields {
223+ "name" : oldBd .Name ,
224+ "err" : err ,
225+ }).Error ("error updating device, waking scanner" )
201226 s .Cond .Signal ()
202227 }
228+ } else if isDevAlreadyProvisioned (oldBd ) {
229+ logrus .WithFields (logrus.Fields {
230+ "name" : oldBd .Name ,
231+ }).Debug ("skipping provisioned device" )
232+ } else if s .NeedsAutoProvision (oldBd , autoProvisioned ) {
233+ logrus .WithFields (logrus.Fields {
234+ "name" : oldBd .Name ,
235+ }).Debug ("enquing device for auto-provisioning" )
236+ s .Blockdevices .Enqueue (s .Namespace , oldBd .Name )
203237 } else {
204- logrus .Debugf ("Skip updating device %s" , newBd .Name )
238+ logrus .WithFields (logrus.Fields {
239+ "name" : oldBd .Name ,
240+ }).Debug ("device is unchanged (no need to update)" )
205241 }
242+ return true
206243}
207244
208245func (s * Scanner ) deactivateOrDeleteBlockDevices (oldBds map [string ]* diskv1.BlockDevice ) error {
209246 for _ , oldBd := range oldBds {
210247 // It should be fine for devices that aren't actually provisioned to go away
248+ // THIS IS BRILLIANT -- IF IT'S FOUND MP DEVICES AND THEY'RE NOT PROVISIONED
249+ // THEN WHEN THE MP DEVICE ACTIVATES THEY GO AWAY!
211250 if oldBd .Status .ProvisionPhase == diskv1 .ProvisionPhaseUnprovisioned {
212251 logrus .Debugf ("Delete device %s" , oldBd .Name )
213252 if err := s .Blockdevices .Delete (oldBd .Namespace , oldBd .Name , & metav1.DeleteOptions {}); err != nil && ! errors .IsNotFound (err ) {
@@ -355,15 +394,17 @@ func (s *Scanner) scanBlockDevicesOnNode(ctx context.Context) error {
355394 }
356395
357396 if existingBd != nil {
358- // Pick up the name of the existing block device we found
397+ // Pick up the name of the existing block device we found (not strictly necessary,
398+ // but just in case we try to use newBd.name in handleExistingDev...)
359399 newBd .Name = existingBd .Name
360- s .handleExistingDev (existingBd , newBd , autoProvisioned )
361- // only first time to update the cache
362- if ! CacheDiskTags .Initialized () && existingBd .Spec .Tags != nil && len (existingBd .Spec .Tags ) > 0 {
363- CacheDiskTags .UpdateDiskTags (existingBd .Name , existingBd .Spec .Tags )
400+ if s .handleExistingDev (existingBd , newBd , autoProvisioned ) {
401+ // only first time to update the cache
402+ if ! CacheDiskTags .Initialized () && existingBd .Spec .Tags != nil && len (existingBd .Spec .Tags ) > 0 {
403+ CacheDiskTags .UpdateDiskTags (existingBd .Name , existingBd .Spec .Tags )
404+ }
405+ // remove blockdevice from list of existing device names so we can delete missing devices afterward
406+ delete (existingBDsByName , newBd .Name )
364407 }
365- // remove blockdevice from list of existing device names so we can delete missing devices afterward
366- delete (existingBDsByName , newBd .Name )
367408 } else {
368409 // New block device, needs a name...
369410 newBd .Name = uuid .NewString ()
@@ -522,15 +563,3 @@ func isDevPathChanged(oldBd *diskv1.BlockDevice, newBd *diskv1.BlockDevice) bool
522563func isDevAlreadyProvisioned (newBd * diskv1.BlockDevice ) bool {
523564 return newBd .Status .ProvisionPhase == diskv1 .ProvisionPhaseProvisioned
524565}
525-
526- func isDMDevice (name string ) bool {
527- // Resolve symlink to check if it points to dm-x
528- // Resolve /dev/mapper/0QEMU_QEMU_HARDDISK_disk1 -> /dev/dm-0
529- // Resolve /dev/dm-0 -> /dev/dm-0
530- path , err := filepath .EvalSymlinks (name )
531- if err == nil && strings .Contains (path , "dm-" ) {
532- return true
533- }
534-
535- return false
536- }
0 commit comments