Files
adcs_api
cbor_protocol
channel_protocol
clyde_3g_eps_api
clyde_3g_eps_service
comms_service
db_test
eps_api
example_rust_c_service
example_rust_service
extern_lib
file_protocol
file_service
iobc_supervisor_service
isis_ants
isis_ants_api
isis_ants_service
isis_imtq_api
isis_iobc_supervisor
kubos_app
kubos_app_service
kubos_build_helper
kubos_file_client
kubos_service
kubos_shell_client
kubos_system
kubos_telemetry_db
large_download
large_upload
local_comms_service
mai400
mai400_api
mai400_service
monitor_service
novatel_oem6_api
novatel_oem6_service
nsl_duplex_d2
nsl_duplex_d2_comms_service
obc_hs
radio_api
rust_i2c
rust_mission_app
rust_uart
scheduler_service
serial_comms_service
shell_protocol
shell_service
telemetry_service
uart_comms_client
udp_client
utils
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
//
// Copyright (C) 2019 Kubos Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License")
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

// Check the current RAM and disk usage
//
// - If RAM usage is too high (> RAM_CRITICAL%), reboot the system
// - If disk usage is too high (> DISK_CRITICAL%), delete all telemetry database entries older than
//   a day (vs the usual week lifespan)

use super::*;
use std::process::Command;

const OBC_TELEMETRY: &str = r#"{
    memInfo {
        available,
        total
    }
}"#;

pub fn check_mem() -> Result<(), Error> {
    // Check RAM usage as reported by the monitor service
    let service = ServiceConfig::new("monitor-service")?;

    let result = query(&service, OBC_TELEMETRY, Some(QUERY_TIMEOUT))?;

    let mem = result["memInfo"]["available"].as_u64().unwrap_or_else(|| {
        error!("Failed to fetch available memory");
        0
    });

    let mem_total = result["memInfo"]["total"].as_u64().unwrap_or_else(|| {
        error!("Failed to fetch available memory");
        mem * 100
    });

    // Convert to percentage in use, since that's an easier number to work with
    let ram_in_use = (100 - mem * 100 / mem_total) as u8;

    // Check RAM usage with respect to thresholds
    if ram_in_use < RAM_NOMINAL {
        info!("RAM usage nominal: {}%", ram_in_use);
    } else if ram_in_use < RAM_HIGH {
        info!("RAM usage high, but acceptable: {}%", ram_in_use);
    } else if ram_in_use < RAM_CRITICAL {
        warn!("RAM usage high: {}%", ram_in_use);
    } else {
        error!("RAM usage too high: {}%. Triggering reboot", ram_in_use);
        // Reboot the system not-nicely. If we're at this point, there's probably a rogue process
        // that's hogging all the system resources and not playing nicely with others.
        Command::new("reboot").arg("-f").status()?;
    }

    // Check disk space usage
    // Get the % of the user data partition that's free
    let disk_in_use = if let Ok(output) = Command::new("df").arg("/home").output() {
        let stdout = if output.stderr.is_empty() {
            output.stdout
        } else {
            vec![]
        };

        let mut slices = stdout.rsplit(|&elem| elem == b' ');

        // The last entry is the mount point (/home)
        slices.next();
        // The second to last entry is the percent in use
        let temp = slices.next();
        // Convert it to a useable number
        let percent = temp
            .unwrap_or(&[])
            .iter()
            .filter_map(|&elem| {
                if elem.is_ascii_digit() {
                    Some(elem as char)
                } else {
                    None
                }
            })
            .collect::<String>();

        percent.parse::<u8>().unwrap_or_else(|err| {
            error!("Failed to parse current disk usage info: {:?}", err);
            100
        })
    } else {
        error!("Failed to get current disk usage info");
        100
    };

    // Check disk usage with respect to threshold
    if disk_in_use < DISK_NOMINAL {
        info!("Disk usage nominal: {}%", disk_in_use);
    } else if disk_in_use < DISK_HIGH {
        info!("Disk usage high, but acceptable: {}%", disk_in_use);
    } else if disk_in_use < DISK_CRITICAL {
        warn!("Disk usage high: {}%", disk_in_use);
    } else {
        error!("Disk usage too high: {}%. Triggering cleanup", disk_in_use);
        // Delete everything from the database that's more than the critical age threshold
        clean_db::clean_db(CRITICAL_AGE)?;

        // Reboot the system nicely. This will clean up any temporary storage which is being used
        // (/tmp), freeing up some additional disk space.
        Command::new("reboot").status()?;
    }

    Ok(())
}