Skip to content

Commit ed8d011

Browse files
committed
Merge pull request #10 from ktemkin/fix_pci_flr
Clean up destruction of domains with PCI devices.
2 parents ab638b2 + 2bc5d0a commit ed8d011

File tree

3 files changed

+29
-19
lines changed

3 files changed

+29
-19
lines changed

xenops/device.ml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,17 +1289,8 @@ let release ~xc ~xs ~hvm pcidevs domid devid =
12891289

12901290
List.iter (fun (dev, resources) ->
12911291
free domid dev resources hvm
1292-
) pcidevs;
1292+
) pcidevs
12931293

1294-
let device = {
1295-
backend = { domid = 0; kind = Pci; devid = devid };
1296-
frontend = { domid = domid; kind = Pci; devid = devid };
1297-
} in
1298-
let backend_path = backend_path_of_device ~xs device in
1299-
let flr = try (xs.Xs.read (backend_path ^ "/flr")) with _ -> "0" in
1300-
if flr = "1" then (
1301-
List.iter (fun (dev, resources) -> do_flr dev) pcidevs;
1302-
)
13031294

13041295
let bind pcidevs =
13051296
let bind_to_pciback device =

xenops/device.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ sig
202202
val bind : dev list -> unit
203203
val plug : xc:Xc.handle -> xs:Xs.xsh -> dev -> Xc.domid -> int -> unit
204204
val unplug : xc:Xc.handle -> xs:Xs.xsh -> dev -> Xc.domid -> int -> unit
205+
val enumerate_devs : xs:Xs.xsh -> (device) -> dev list
206+
val do_flr : dev -> unit
205207

206208
val mmio : dev -> (int64 * int64 * int64) list
207209
val io : dev -> (int64 * int64 * int64) list

xenops/domain.ml

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,23 @@ let hard_shutdown_all_vbds ~xc ~xs ?(extra_debug_paths = []) (devices: device li
137137
with Watch.Timeout _ ->
138138
debug "Timeout waiting for backends to flush";
139139
raise Timeout_backend
140-
141-
let rec destroy_nowait ?(preserve_xs_vm=false) ~xc ~xs domid =
140+
141+
142+
(* Run an FLR on all passed-in devices *)
143+
let do_flr_on_collection ~xs all_pci_devices =
144+
(* For each PCI device in the given collection... *)
145+
List.iter (fun device ->
146+
debug "Requesting FLR on %s via do_flr_on_collection." (string_of_device device);
147+
148+
(* Find all PCI device objects matching the given device model. *)
149+
let devs = Device.PCI.enumerate_devs ~xs device in
150+
151+
(* And perform an FLR on each of those devices. *)
152+
List.iter (fun dev -> Device.PCI.do_flr dev) devs
153+
154+
) all_pci_devices
155+
156+
let rec destroy_nowait ?(preserve_xs_vm=false) ~xc ~xs domid ?(do_flr=true) =
142157
let dom_path = xs.Xs.getdomainpath domid in
143158

144159
let all_devices = list_devices_for ~xs domid in
@@ -148,17 +163,17 @@ let rec destroy_nowait ?(preserve_xs_vm=false) ~xc ~xs domid =
148163
let all_pci_devices = List.filter (fun device -> device.backend.kind = Pci) all_devices in
149164
let all_nonpci_devices = List.filter (fun device -> device.backend.kind <> Pci) all_devices in
150165

151-
(* Now we should kill the domain itself *)
152-
debug "Domain.destroy calling Xc.domain_destroy (domid %d)" domid;
153-
log_exn_continue "Xc.domain_destroy" (Xc.domain_destroy xc) domid;
154-
155166
(* forcibly shutdown every pci backend. doing it before shutting ioemu ensures that device is back in dom0
156167
* when surfman gets notified about domain death *)
157168
List.iter (fun device ->
158169
try Device.hard_shutdown ~xs device
159170
with exn -> debug "Caught exception %s while destroying device %s" (Printexc.to_string exn) (string_of_device device);
160171
) all_pci_devices;
161172

173+
(* Now we should kill the domain itself *)
174+
debug "Domain.destroy calling Xc.domain_destroy (domid %d)" domid;
175+
log_exn_continue "Xc.domain_destroy" (Xc.domain_destroy xc) domid;
176+
162177
log_exn_continue "Error signaling dm-agents that domain will be destroyed"
163178
(fun () -> Dmagent.stop ~xs domid) ();
164179

@@ -175,15 +190,15 @@ let rec destroy_nowait ?(preserve_xs_vm=false) ~xc ~xs domid =
175190
let reason = Xal.wait_release xal ~timeout:60. stubdomid in
176191
info "stubdom has died, reason: %s" (Xal.string_of_died_reason reason);
177192
(* shoot later *)
178-
destroy_nowait ~xc ~xs stubdomid
193+
destroy_nowait ~xc ~xs stubdomid ~do_flr:false
179194
)
180195
with _ ->
181196
info "stubdom didn't shutdown after 1min";
182197
raise (Stubdom_didnt_shutdown stubdomid)
183198
) else (
184199
debug "stubdom didn't ACK shutdown request";
185200
(* shoot later *)
186-
destroy_nowait ~xc ~xs stubdomid
201+
destroy_nowait ~xc ~xs stubdomid ~do_flr:false
187202
)
188203
) ();
189204

@@ -196,6 +211,8 @@ let rec destroy_nowait ?(preserve_xs_vm=false) ~xc ~xs domid =
196211
with exn -> debug "Caught exception %s while destroying device %s" (Printexc.to_string exn) (string_of_device device);
197212
) all_nonpci_devices;
198213

214+
(* and perform a reset on every PCI backend *)
215+
if do_flr then do_flr_on_collection ~xs all_pci_devices;
199216

200217
(* For each device which has a hotplug entry, perform the cleanup. Even if one
201218
fails, try to cleanup the rest anyway.*)
@@ -240,7 +257,7 @@ let rec destroy_nowait ?(preserve_xs_vm=false) ~xc ~xs domid =
240257

241258

242259
let destroy ?(preserve_xs_vm=false) ~xc ~xs domid =
243-
destroy_nowait ~preserve_xs_vm ~xc ~xs domid;
260+
destroy_nowait ~preserve_xs_vm ~xc ~xs domid ~do_flr:true;
244261
(* Block waiting for the dying domain to disappear: aim is to catch shutdown errors early*)
245262
let still_exists () =
246263
try

0 commit comments

Comments
 (0)