Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ use nexus_types::internal_api::background::BlueprintRendezvousStatus;
use nexus_types::internal_api::background::EreporterStatus;
use nexus_types::internal_api::background::InstanceReincarnationStatus;
use nexus_types::internal_api::background::InstanceUpdaterStatus;
use nexus_types::internal_api::background::InventoryLoadStatus;
use nexus_types::internal_api::background::LookupRegionPortStatus;
use nexus_types::internal_api::background::ReadOnlyRegionReplacementStartStatus;
use nexus_types::internal_api::background::RegionReplacementDriverStatus;
Expand Down Expand Up @@ -1158,6 +1159,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"inventory_collection" => {
print_task_inventory_collection(details);
}
"inventory_loader" => {
print_task_inventory_load(details);
}
"lookup_region_port" => {
print_task_lookup_region_port(details);
}
Expand Down Expand Up @@ -1971,6 +1975,37 @@ fn print_task_inventory_collection(details: &serde_json::Value) {
};
}

fn print_task_inventory_load(details: &serde_json::Value) {
match serde_json::from_value::<InventoryLoadStatus>(details.clone()) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),
Ok(status) => match status {
InventoryLoadStatus::Error(error) => {
println!(" task did not complete successfully: {error}");
}
InventoryLoadStatus::NoCollections => {
println!(" no collections available to load");
}
InventoryLoadStatus::Loaded {
collection_id,
time_started,
time_loaded,
} => {
println!(
" loaded latest inventory collection as of {}:",
humantime::format_rfc3339_millis(time_loaded.into())
);
println!(
" collection {collection_id}, taken at {}",
humantime::format_rfc3339_millis(time_started.into()),
);
}
},
};
}

fn print_task_lookup_region_port(details: &serde_json::Value) {
match serde_json::from_value::<LookupRegionPortStatus>(details.clone()) {
Ok(LookupRegionPortStatus { found_port_ok, errors }) => {
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "inventory_loader"
loads the latest inventory collection from the DB


task: "lookup_region_port"
fill in missing ports for region records

Expand Down Expand Up @@ -328,6 +332,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "inventory_loader"
loads the latest inventory collection from the DB


task: "lookup_region_port"
fill in missing ports for region records

Expand Down Expand Up @@ -527,6 +535,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "inventory_loader"
loads the latest inventory collection from the DB


task: "lookup_region_port"
fill in missing ports for region records

Expand Down
18 changes: 18 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "inventory_loader"
loads the latest inventory collection from the DB


task: "lookup_region_port"
fill in missing ports for region records

Expand Down Expand Up @@ -649,6 +653,13 @@ task: "inventory_collection"
last collection started: <REDACTED_TIMESTAMP>
last collection done: <REDACTED_TIMESTAMP>

task: "inventory_loader"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
loaded latest inventory collection as of <REDACTED_TIMESTAMP>:
collection ..........<REDACTED_UUID>..........., taken at <REDACTED_TIMESTAMP>

task: "lookup_region_port"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down Expand Up @@ -1177,6 +1188,13 @@ task: "inventory_collection"
last collection started: <REDACTED_TIMESTAMP>
last collection done: <REDACTED_TIMESTAMP>

task: "inventory_loader"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
loaded latest inventory collection as of <REDACTED_TIMESTAMP>:
collection ..........<REDACTED_UUID>..........., taken at <REDACTED_TIMESTAMP>

task: "lookup_region_port"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down
37 changes: 26 additions & 11 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,13 +562,25 @@ pub struct SwitchPortSettingsManagerConfig {
#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct InventoryConfig {
/// period (in seconds) for periodic activations of this background task
/// period (in seconds) for periodic activations of the background task to
/// load the latest inventory collection
///
/// Each activation runs a fast query to check whether there is a new
/// collection, and only follows up with the set of queries required to load
/// its contents if there's been a change. This period should be pretty
/// aggressive to ensure consumers are usually acting on the latest
/// collection.
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs_load: Duration,

/// period (in seconds) for periodic activations of the background task to
/// collect inventory
///
/// Each activation fetches information about all hardware and software in
/// the system and inserts it into the database. This generates a moderate
/// amount of data.
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
pub period_secs_collect: Duration,

/// maximum number of past collections to keep in the database
///
Expand All @@ -580,7 +592,7 @@ pub struct InventoryConfig {
///
/// This is an emergency lever for support / operations. It should never be
/// necessary.
pub disable: bool,
pub disable_collect: bool,
}

#[serde_as]
Expand Down Expand Up @@ -1109,9 +1121,10 @@ mod test {
external_endpoints.period_secs = 9
nat_cleanup.period_secs = 30
bfd_manager.period_secs = 30
inventory.period_secs = 10
inventory.nkeep = 11
inventory.disable = false
inventory.period_secs_load = 10
inventory.period_secs_collect = 11
inventory.nkeep = 12
inventory.disable_collect = false
support_bundle_collector.period_secs = 30
physical_disk_adoption.period_secs = 30
decommissioned_disk_cleaner.period_secs = 30
Expand Down Expand Up @@ -1274,9 +1287,10 @@ mod test {
period_secs: Duration::from_secs(30),
},
inventory: InventoryConfig {
period_secs: Duration::from_secs(10),
nkeep: 11,
disable: false,
period_secs_load: Duration::from_secs(10),
period_secs_collect: Duration::from_secs(11),
nkeep: 12,
disable_collect: false,
},
support_bundle_collector:
SupportBundleCollectorConfig {
Expand Down Expand Up @@ -1448,9 +1462,10 @@ mod test {
external_endpoints.period_secs = 9
nat_cleanup.period_secs = 30
bfd_manager.period_secs = 30
inventory.period_secs = 10
inventory.period_secs_load = 10
inventory.period_secs_collect = 10
inventory.nkeep = 3
inventory.disable = false
inventory.disable_collect = false
support_bundle_collector.period_secs = 30
physical_disk_adoption.period_secs = 30
decommissioned_disk_cleaner.period_secs = 30
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub struct BackgroundTasks {
pub task_nat_cleanup: Activator,
pub task_bfd_manager: Activator,
pub task_inventory_collection: Activator,
pub task_inventory_loader: Activator,
pub task_support_bundle_collector: Activator,
pub task_physical_disk_adoption: Activator,
pub task_decommissioned_disk_cleaner: Activator,
Expand Down
32 changes: 20 additions & 12 deletions nexus/db-queries/src/db/datastore/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2329,16 +2329,18 @@ impl DataStore {
})
}

/// Attempt to read the latest collection.
/// Attempt to get the ID of the latest collection.
///
/// If there aren't any collections, return `Ok(None)`.
pub async fn inventory_get_latest_collection(
pub async fn inventory_get_latest_collection_id(
&self,
opctx: &OpContext,
) -> Result<Option<Collection>, Error> {
) -> Result<Option<CollectionUuid>, Error> {
use nexus_db_schema::schema::inv_collection::dsl;

opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?;
let conn = self.pool_connection_authorized(opctx).await?;
use nexus_db_schema::schema::inv_collection::dsl;

let collection_id = dsl::inv_collection
.select(dsl::id)
.order_by(dsl::time_started.desc())
Expand All @@ -2347,17 +2349,23 @@ impl DataStore {
.optional()
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

let Some(collection_id) = collection_id else {
Ok(collection_id.map(CollectionUuid::from_untyped_uuid))
}

/// Attempt to read the latest collection.
///
/// If there aren't any collections, return `Ok(None)`.
pub async fn inventory_get_latest_collection(
&self,
opctx: &OpContext,
) -> Result<Option<Collection>, Error> {
let Some(collection_id) =
self.inventory_get_latest_collection_id(opctx).await?
else {
return Ok(None);
};

Ok(Some(
self.inventory_collection_read(
opctx,
CollectionUuid::from_untyped_uuid(collection_id),
)
.await?,
))
Ok(Some(self.inventory_collection_read(opctx, collection_id).await?))
}

/// Attempt to read the current collection
Expand Down
7 changes: 5 additions & 2 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,16 @@ metrics_producer_gc.period_secs = 60
external_endpoints.period_secs = 60
nat_cleanup.period_secs = 30
bfd_manager.period_secs = 30
# How frequently to check for a new inventory collection (made by any Nexus).
# This is cheap, so we should check frequently.
inventory.period_secs_load = 15
# How frequently to collect hardware/software inventory from the whole system
# (even if we don't have reason to believe anything has changed).
inventory.period_secs = 600
inventory.period_secs_collect = 600
# Maximum number of past collections to keep in the database
inventory.nkeep = 5
# Disable inventory collection altogether (for emergencies)
inventory.disable = false
inventory.disable_collect = false
phantom_disks.period_secs = 30
physical_disk_adoption.period_secs = 30
support_bundle_collector.period_secs = 30
Expand Down
7 changes: 5 additions & 2 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,16 @@ metrics_producer_gc.period_secs = 60
external_endpoints.period_secs = 60
nat_cleanup.period_secs = 30
bfd_manager.period_secs = 30
# How frequently to check for a new inventory collection (made by any Nexus).
# This is cheap, so we should check frequently.
inventory.period_secs_load = 15
# How frequently to collect hardware/software inventory from the whole system
# (even if we don't have reason to believe anything has changed).
inventory.period_secs = 600
inventory.period_secs_collect = 600
# Maximum number of past collections to keep in the database
inventory.nkeep = 5
# Disable inventory collection altogether (for emergencies)
inventory.disable = false
inventory.disable_collect = false
phantom_disks.period_secs = 30
physical_disk_adoption.period_secs = 30
support_bundle_collector.period_secs = 30
Expand Down
Loading
Loading