//
// Syd: rock-solid application kernel
// src/compat.rs: Compatibility code for different libcs
//
// Copyright (c) 2023, 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
// waitid and WaitStatus are based in part upon nix which is:
//     Copyright (c) nix authors
//     SPDX-License-Identifier: MIT
//
// SPDX-License-Identifier: GPL-3.0

#![allow(non_camel_case_types)]
#![allow(non_upper_case_globals)]

use std::{
    alloc::{alloc, dealloc, Layout},
    cell::RefCell,
    cmp::Ordering,
    ffi::CStr,
    fmt,
    num::NonZeroUsize,
    os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, OwnedFd, RawFd},
    ptr::NonNull,
    sync::LazyLock,
};

use bitflags::bitflags;
use libseccomp::ScmpSyscall;
use memchr::arch::all::is_equal;
use nix::{
    errno::Errno,
    fcntl::{AtFlags, OFlag},
    sys::{
        epoll::EpollOp,
        signal::Signal,
        socket::SockaddrLike,
        stat::Mode,
        time::TimeSpec,
        wait::{Id, WaitPidFlag, WaitStatus as NixWaitStatus},
    },
    unistd::Pid,
    NixPath,
};
use serde::{ser::SerializeMap, Serialize, Serializer};

use crate::{
    config::*, ioctl::Ioctl, lookup::FileType, parsers::sandbox::str2u64, retry::retry_on_eintr,
    XPath,
};

#[repr(C)]
#[derive(Copy, Clone)]
#[allow(dead_code)]
struct syscall_ll_pair(libc::c_ulong, libc::c_ulong);

// Split a 64-bit value into two 32-bit values ordered by endianness.
//
// This macro mimics musl's __SYSCALL_LL_E macro.
#[allow(unused_macros)]
macro_rules! syscall_ll_e {
    ($val:expr) => {{
        // All 32-bit except x32 and n32.
        #[cfg(all(
            target_pointer_width = "32",
            not(target_arch = "x86_64"),
            not(target_arch = "mips64")
        ))]
        {
            let val = $val as libc::c_ulonglong;
            let lo = (val & 0xffff_ffff) as libc::c_ulong;
            let hi = (val >> 32) as libc::c_ulong;
            if cfg!(target_endian = "little") {
                syscall_ll_pair(lo, hi)
            } else {
                syscall_ll_pair(hi, lo)
            }
        }
        #[cfg(not(all(
            target_pointer_width = "32",
            not(target_arch = "x86_64"),
            not(target_arch = "mips64")
        )))]
        {
            $val
        }
    }};
}

// AT_EACCESS is not defined in nix for Android, but it is 0x200 on Linux.
pub(crate) const AT_EACCESS: nix::fcntl::AtFlags = nix::fcntl::AtFlags::from_bits_retain(0x200);

pub(crate) const UIO_MAXIOV: usize = libc::UIO_MAXIOV as usize;

// IPC_SET is 1 on Linux.
pub(crate) const IPC_SET: i32 = 1;

/// On Linux, we use the libc definitions for ptrace(2).
/// On Android, these are missing from libc, so we define them.
#[cfg(not(target_os = "android"))]
pub use libc::{NT_PRSTATUS, PTRACE_LISTEN, PTRACE_SEIZE};

#[cfg(target_os = "android")]
pub const NT_PRSTATUS: libc::c_int = 1;
#[cfg(target_os = "android")]
pub const PTRACE_SEIZE: libc::c_int = 0x4206;
#[cfg(target_os = "android")]
pub const PTRACE_LISTEN: libc::c_int = 0x4208;

// SYS_fstatfs is 44 on aarch64, 100 on arm/x86, and 138 on x86_64.
#[cfg(all(target_os = "android", target_arch = "aarch64"))]
pub(crate) const SYS_fstatfs: libc::c_long = 44;
#[cfg(all(target_os = "android", any(target_arch = "arm", target_arch = "x86")))]
pub(crate) const SYS_fstatfs: libc::c_long = 100;
#[cfg(all(target_os = "android", target_arch = "x86_64"))]
pub(crate) const SYS_fstatfs: libc::c_long = 138;
// On Linux, use libc definitions.
#[cfg(not(target_os = "android"))]
pub(crate) use libc::SYS_fstatfs;

// x32 compatibility
// See https://sourceware.org/bugzilla/show_bug.cgi?id=16437
#[cfg(all(target_arch = "x86_64", target_pointer_width = "32"))]
#[expect(non_camel_case_types)]
pub(crate) type timespec_tv_nsec_t = i64;
#[cfg(not(all(target_arch = "x86_64", target_pointer_width = "32")))]
#[expect(non_camel_case_types)]
pub(crate) type timespec_tv_nsec_t = libc::c_long;

// C-compatible layout of the `timespec64` structure.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[repr(C)]
pub(crate) struct TimeSpec64 {
    pub(crate) tv_sec: i64,
    pub(crate) tv_nsec: i64,
}

// C-compatible layout of the `timespec32` structure.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[repr(C)]
pub(crate) struct TimeSpec32 {
    pub(crate) tv_sec: i32,
    pub(crate) tv_nsec: i32,
}

/// Aligns the given length to the nearest 4-byte boundary.
///
/// This function is useful for ensuring that data structures in a 32-bit architecture
/// are correctly aligned to 4 bytes as required by many system interfaces for proper operation.
///
/// # Arguments
///
/// * `len` - The length that needs to be aligned.
///
/// # Returns
///
/// The smallest length that is a multiple of 4 bytes and is not less than `len`.
pub(crate) const fn cmsg_align_32(len: usize) -> usize {
    len.saturating_add(3) & !3
}

/// Calculates the total space required for a control message including data and padding on a 32-bit system.
///
/// This function is specifically tailored for 32-bit architectures where control message
/// headers and data need to be aligned to 4-byte boundaries.
///
/// # Arguments
///
/// * `length` - The length of the data in the control message.
///
/// # Returns
///
/// The total space in bytes required to store the control message, ensuring proper alignment.
pub(crate) const fn cmsg_space_32(length: u32) -> usize {
    cmsg_align_32((length as usize).saturating_add(cmsg_align_32(size_of::<cmsghdr32>())))
}

/// Computes the byte length of a control message's header and data for a 32-bit system.
///
/// This function helps in determining the correct length for control messages where the
/// header and data must be aligned to 4-byte boundaries in a 32-bit architecture.
///
/// # Arguments
///
/// * `length` - The length of the data part of the control message.
///
/// # Returns
///
/// The combined length of the control message header and the data, aligned as required.
pub(crate) const fn cmsg_len_32(length: u32) -> usize {
    cmsg_align_32(size_of::<cmsghdr32>()).saturating_add(length as usize)
}

/// This structure represents the Linux 32-bit data structure 'struct stat'
#[repr(C)]
pub struct stat32 {
    /// Device ID.
    pub st_dev: libc::dev_t,

    /// Inode number (32-bit).
    pub st_ino: u32,

    /// Number of hard links.
    pub st_nlink: libc::nlink_t,

    /// File mode.
    pub st_mode: libc::mode_t,

    /// User ID of owner.
    pub st_uid: libc::uid_t,

    /// Group ID of owner.
    pub st_gid: libc::gid_t,

    /// Padding.
    __pad0: libc::c_int,

    /// Device ID (if special file).
    pub st_rdev: libc::dev_t,

    /// Total size, in bytes (32-bit).
    pub st_size: i32,

    /// Block size for filesystem I/O.
    pub st_blksize: libc::blksize_t,

    /// Number of 512B blocks allocated (32-bit).
    pub st_blocks: i32,

    /// Time of last access (32-bit).
    pub st_atime: i32,

    /// Nanoseconds of last access (32-bit).
    pub st_atime_nsec: i32,

    /// Time of last modification (32-bit).
    pub st_mtime: i32,

    /// Nanoseconds of last modification (32-bit).
    pub st_mtime_nsec: i32,

    /// Time of last status change (32-bit).
    pub st_ctime: i32,

    /// Nanoseconds of last status change (32-bit).
    pub st_ctime_nsec: i32,

    /// Reserved for future use (32-bit).
    __unused: [i32; 3],
}

impl From<libc::stat64> for stat32 {
    #[expect(clippy::cast_possible_truncation)]
    #[expect(clippy::unnecessary_cast)]
    fn from(stat: libc::stat64) -> Self {
        Self {
            st_dev: stat.st_dev as u64,
            st_ino: stat.st_ino as u32,
            st_nlink: stat.st_nlink,
            st_mode: stat.st_mode,
            st_uid: stat.st_uid,
            st_gid: stat.st_gid,
            __pad0: 0,
            st_rdev: stat.st_rdev as u64,
            st_size: stat.st_size as i32,
            #[cfg(target_os = "android")]
            st_blksize: stat.st_blksize as u64,
            #[cfg(not(target_os = "android"))]
            st_blksize: stat.st_blksize,
            st_blocks: stat.st_blocks as i32,
            st_atime: stat.st_atime as i32,
            st_atime_nsec: stat.st_atime_nsec as i32,
            st_mtime: stat.st_mtime as i32,
            st_mtime_nsec: stat.st_mtime_nsec as i32,
            st_ctime: stat.st_ctime as i32,
            st_ctime_nsec: stat.st_ctime_nsec as i32,
            __unused: [0; 3],
        }
    }
}

/// This structure represents the Linux 32-bit data structure 'struct iovec'
#[derive(Copy, Clone)]
#[repr(C)]
pub struct iovec32 {
    iov_base: u32,
    iov_len: u32,
}

impl From<iovec32> for libc::iovec {
    fn from(src: iovec32) -> Self {
        libc::iovec {
            iov_base: src.iov_base as *mut _,
            iov_len: src.iov_len as usize,
        }
    }
}

/// This structure represents the Linux 32-bit data structure 'struct mmmsghdr'
#[derive(Copy, Clone)]
#[repr(C)]
pub struct mmsghdr32 {
    pub msg_hdr: msghdr32,
    pub msg_len: u32,
}

/// This structure represents the Linux native data structure 'struct mmsghdr'
#[derive(Copy, Clone)]
#[repr(C)]
pub struct mmsghdr {
    pub msg_hdr: msghdr,
    pub msg_len: libc::c_uint,
}

impl From<mmsghdr32> for mmsghdr {
    fn from(src: mmsghdr32) -> Self {
        mmsghdr {
            msg_hdr: msghdr::from(src.msg_hdr),
            msg_len: src.msg_len,
        }
    }
}

impl From<mmsghdr> for mmsghdr32 {
    fn from(src: mmsghdr) -> Self {
        mmsghdr32 {
            msg_hdr: msghdr32::from(src.msg_hdr),
            msg_len: src.msg_len,
        }
    }
}

const _: () = {
    assert!(
        size_of::<libc::mmsghdr>() == size_of::<mmsghdr>(),
        "Size mismatch between libc::mmsghdr and compat::mmsghdr"
    );
};

impl From<libc::mmsghdr> for mmsghdr {
    fn from(msg: libc::mmsghdr) -> Self {
        // SAFETY: Since we have confirmed the sizes are the same at compile time,
        // we can safely perform a transmute. This assumes no differences in alignment
        // or memory layout between libc::mmsghdr and mmsghdr.
        unsafe { std::mem::transmute(msg) }
    }
}

impl From<libc::mmsghdr> for mmsghdr32 {
    fn from(msg: libc::mmsghdr) -> Self {
        mmsghdr::from(msg).into()
    }
}

impl From<mmsghdr> for libc::mmsghdr {
    fn from(msg: mmsghdr) -> Self {
        // SAFETY: Since we have confirmed the sizes are the same at compile time,
        // we can safely perform a transmute. This assumes no differences in alignment
        // or memory layout between libc::mmsghdr and mmsghdr.
        unsafe { std::mem::transmute(msg) }
    }
}

impl From<mmsghdr32> for libc::mmsghdr {
    fn from(msg: mmsghdr32) -> Self {
        mmsghdr::from(msg).into()
    }
}

/// This union represents the Linux data structure 'struct mmsghdr'
#[repr(C)]
pub union mmsghdr_union {
    // 32-bit mmsghdr32
    pub m32: mmsghdr32,
    // Native mmsghdr
    pub m64: mmsghdr,
}

/// This structure represents the Linux 32-bit data structure 'struct cmsghdr'
#[repr(C)]
pub struct cmsghdr32 {
    pub cmsg_len: u32,
    pub cmsg_level: i32,
    pub cmsg_type: i32,
}

/// This structure represents the Linux native data structure 'struct cmsghdr'
#[repr(C)]
pub struct cmsghdr {
    pub cmsg_len: libc::size_t,
    pub cmsg_level: libc::c_int,
    pub cmsg_type: libc::c_int,
}

impl From<cmsghdr32> for cmsghdr {
    fn from(src: cmsghdr32) -> Self {
        cmsghdr {
            cmsg_len: src.cmsg_len as libc::size_t,
            cmsg_level: src.cmsg_level,
            cmsg_type: src.cmsg_type,
        }
    }
}

/// This structure represents the Linux 32-bit data structure 'struct msghdr'
#[derive(Copy, Clone)]
#[repr(C)]
pub struct msghdr32 {
    // In 32-bit systems, pointers are 32-bit.
    pub msg_name: u32,       // Use u32 to represent a 32-bit pointer.
    pub msg_namelen: u32,    // socklen_t is typically 32-bit.
    pub msg_iov: u32,        // Use u32 to represent a 32-bit pointer to iovec.
    pub msg_iovlen: u32,     // size_t is 32-bit on 32-bit systems.
    pub msg_control: u32,    // Use u32 to represent a 32-bit pointer.
    pub msg_controllen: u32, // size_t is 32-bit on 32-bit systems.
    pub msg_flags: i32,      // c_int remains the same (32-bit).
}

/// This structure represents the Linux native data structure 'struct msghdr'
#[derive(Copy, Clone)]
#[repr(C)]
pub struct msghdr {
    pub msg_name: *mut libc::c_void,
    pub msg_namelen: libc::socklen_t,
    pub msg_iov: *mut libc::iovec,
    pub msg_iovlen: libc::size_t,
    pub msg_control: *mut libc::c_void,
    pub msg_controllen: libc::size_t,
    pub msg_flags: libc::c_int,
}

impl From<msghdr32> for msghdr {
    fn from(msg: msghdr32) -> Self {
        msghdr {
            msg_name: msg.msg_name as *mut libc::c_void,
            msg_namelen: msg.msg_namelen as libc::socklen_t,
            msg_iov: msg.msg_iov as *mut libc::iovec,
            msg_iovlen: msg.msg_iovlen as libc::size_t,
            msg_control: msg.msg_control as *mut libc::c_void,
            msg_controllen: msg.msg_controllen as libc::size_t,
            msg_flags: msg.msg_flags as libc::c_int,
        }
    }
}

#[expect(clippy::unnecessary_cast)]
#[expect(clippy::cast_possible_truncation)]
impl From<msghdr> for msghdr32 {
    fn from(msg: msghdr) -> Self {
        msghdr32 {
            msg_name: msg.msg_name as u32,
            msg_namelen: msg.msg_namelen as u32,
            msg_iov: msg.msg_iov as u32,
            msg_iovlen: msg.msg_iovlen as u32,
            msg_control: msg.msg_control as u32,
            msg_controllen: msg.msg_controllen as u32,
            msg_flags: msg.msg_flags as i32,
        }
    }
}

const _: () = {
    assert!(
        size_of::<libc::msghdr>() == size_of::<msghdr>(),
        "Size mismatch between libc::msghdr and compat::msghdr"
    );
};

impl From<libc::msghdr> for msghdr {
    fn from(msg: libc::msghdr) -> Self {
        // SAFETY: Since we have confirmed the sizes are the same at compile time,
        // we can safely perform a transmute. This assumes no differences in alignment
        // or memory layout between libc::msghdr and msghdr.
        unsafe { std::mem::transmute(msg) }
    }
}

impl From<libc::msghdr> for msghdr32 {
    fn from(msg: libc::msghdr) -> Self {
        msghdr::from(msg).into()
    }
}

/// Rust equivalent of the Linux kernel's struct xattr_args:
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct XattrArgs {
    /// 64-bit pointer to user buffer.
    pub value: u64,

    /// Size of the buffer.
    pub size: u32,

    /// XATTR_ flags (e.g., XATTR_CREATE or XATTR_REPLACE). Should be 0
    /// for getxattrat(2).
    pub flags: u32,
}

// Note getxattrat may not be available,
// and libc::SYS_getxattrat may not be defined.
// Therefore we query the number using libseccomp.
static SYS_GETXATTRAT: LazyLock<libc::c_long> = LazyLock::new(|| {
    ScmpSyscall::from_name("getxattrat")
        .map(i32::from)
        .map(libc::c_long::from)
        .unwrap_or(0)
});

/// Safe getxattrat(2) wrapper which is new in Linux>=6.13.
///
/// # Safety
///
/// This function dereferences `name` argument which is a raw pointer.
/// It is safe for this pointer to be NULL but otherwise it must point
/// to valid memory and the caller must ensure that.
pub unsafe fn getxattrat<Fd: AsFd, P: ?Sized + NixPath>(
    dirfd: Fd,
    path: &P,
    name: *const libc::c_char,
    args: &mut XattrArgs,
    flags: AtFlags,
) -> Result<usize, Errno> {
    let sysno = if *SYS_GETXATTRAT > 0 {
        *SYS_GETXATTRAT
    } else {
        return Err(Errno::ENOSYS);
    };

    path.with_nix_path(|c_path| {
        #[expect(clippy::cast_possible_truncation)]
        #[expect(clippy::cast_sign_loss)]
        Errno::result(
            // SAFETY: In libc we trust.
            unsafe {
                libc::syscall(
                    sysno as libc::c_long,
                    dirfd.as_fd().as_raw_fd(),
                    c_path.as_ptr(),
                    flags.bits(),
                    name,
                    args as *mut XattrArgs,
                    size_of::<XattrArgs>(),
                )
            },
        )
        .map(|r| r as usize)
    })?
}

// Note setxattrat may not be available,
// and libc::SYS_setxattrat may not be defined.
// Therefore we query the number using libseccomp.
static SYS_SETXATTRAT: LazyLock<libc::c_long> = LazyLock::new(|| {
    ScmpSyscall::from_name("setxattrat")
        .map(i32::from)
        .map(libc::c_long::from)
        .unwrap_or(0)
});

/// Safe setxattrat(2) wrapper which is new in Linux>=6.13.
///
/// # Safety
///
/// This function dereferences `name` argument which is a raw pointer.
/// It is safe for this pointer to be NULL but otherwise it must point
/// to valid memory and the caller must ensure that.
pub unsafe fn setxattrat<Fd: AsFd, P: ?Sized + NixPath>(
    dirfd: Fd,
    path: &P,
    name: *const libc::c_char,
    args: &XattrArgs,
    flags: AtFlags,
) -> Result<(), Errno> {
    let sysno = if *SYS_SETXATTRAT > 0 {
        *SYS_SETXATTRAT
    } else {
        return Err(Errno::ENOSYS);
    };

    path.with_nix_path(|c_path| {
        // SAFETY: In libc we trust.
        Errno::result(unsafe {
            libc::syscall(
                sysno as libc::c_long,
                dirfd.as_fd().as_raw_fd(),
                c_path.as_ptr(),
                flags.bits(),
                name,
                args as *const XattrArgs,
                size_of::<XattrArgs>(),
            )
        })
        .map(drop)
    })?
}

// Note listxattrat may not be available,
// and libc::SYS_listxattrat may not be defined.
// Therefore we query the number using libseccomp.
static SYS_LISTXATTRAT: LazyLock<libc::c_long> = LazyLock::new(|| {
    ScmpSyscall::from_name("listxattrat")
        .map(i32::from)
        .map(libc::c_long::from)
        .unwrap_or(0)
});

/// Safe listxattrat(2) wrapper which is new in Linux>=6.13.
///
/// # Safety
///
/// This function dereferences `addr` argument which is a raw pointer.
/// It is safe for this pointer to be NULL but otherwise it must point
/// to valid memory and the caller must ensure that.
pub unsafe fn listxattrat<Fd: AsFd, P: ?Sized + NixPath>(
    dirfd: Fd,
    path: &P,
    flags: AtFlags,
    addr: *mut libc::c_char,
    size: usize,
) -> Result<usize, Errno> {
    let sysno = if *SYS_LISTXATTRAT > 0 {
        *SYS_LISTXATTRAT
    } else {
        return Err(Errno::ENOSYS);
    };

    path.with_nix_path(|c_path| {
        #[expect(clippy::cast_possible_truncation)]
        #[expect(clippy::cast_sign_loss)]
        Errno::result(
            // SAFETY: In libc we trust.
            unsafe {
                libc::syscall(
                    sysno as libc::c_long,
                    dirfd.as_fd().as_raw_fd(),
                    c_path.as_ptr(),
                    flags.bits(),
                    addr,
                    size,
                )
            },
        )
        .map(|r| r as usize)
    })?
}

// Note removexattrat may not be available,
// and libc::SYS_removexattrat may not be defined.
// Therefore we query the number using libseccomp.
static SYS_REMOVEXATTRAT: LazyLock<libc::c_long> = LazyLock::new(|| {
    ScmpSyscall::from_name("removexattrat")
        .map(i32::from)
        .map(libc::c_long::from)
        .unwrap_or(0)
});

/// Safe removexattrat(2) wrapper, new in Linux>=6.13.
///
/// # Safety
///
/// This function dereferences `name` argument which is a raw pointer.
/// It is safe for this pointer to be NULL but otherwise it must point
/// to valid memory and the caller must ensure that.
pub unsafe fn removexattrat<Fd: AsFd, P: ?Sized + NixPath>(
    dirfd: Fd,
    path: &P,
    name: *const libc::c_char,
    flags: AtFlags,
) -> Result<(), Errno> {
    let sysno = if *SYS_REMOVEXATTRAT > 0 {
        *SYS_REMOVEXATTRAT
    } else {
        return Err(Errno::ENOSYS);
    };

    path.with_nix_path(|c_path| {
        // SAFETY: In libc we trust.
        Errno::result(unsafe {
            libc::syscall(
                sysno as libc::c_long,
                dirfd.as_fd().as_raw_fd(),
                c_path.as_ptr(),
                flags.bits(),
                name,
            )
        })
        .map(drop)
    })?
}

/// This structure represents the Linux data structure `struct statx_timestamp`
#[repr(C)]
#[derive(Copy, Clone, Debug, Default)]
pub struct FileStatxTimestamp {
    pub tv_sec: i64,
    pub tv_nsec: u32,
    pub(crate) __statx_timestamp_pad1: [i32; 1],
}

impl PartialEq for FileStatxTimestamp {
    fn eq(&self, other: &Self) -> bool {
        self.tv_sec == other.tv_sec && self.tv_nsec == other.tv_nsec
    }
}

impl Eq for FileStatxTimestamp {}

impl PartialOrd for FileStatxTimestamp {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for FileStatxTimestamp {
    fn cmp(&self, other: &Self) -> Ordering {
        match self.tv_sec.cmp(&other.tv_sec) {
            Ordering::Equal => self.tv_nsec.cmp(&other.tv_nsec),
            ord => ord,
        }
    }
}

impl Serialize for FileStatxTimestamp {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut map = serializer.serialize_map(Some(2))?;

        map.serialize_entry("sec", &self.tv_sec)?;
        map.serialize_entry("nsec", &self.tv_nsec)?;

        map.end()
    }
}

/// This structure represents the Linux data structure `struct statx`
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct FileStatx {
    pub stx_mask: u32,   // What results were written [uncond]
    stx_blksize: u32,    // Preferred general I/O size [uncond]
    stx_attributes: u64, // Flags conveying information about the file [uncond]

    pub stx_nlink: u32, // Number of hard links
    pub stx_uid: u32,   // User ID of owner
    pub stx_gid: u32,   // Group ID of owner
    pub stx_mode: u16,  // File mode
    __statx_pad1: [u16; 1],

    pub stx_ino: u64,         // Inode number
    pub stx_size: u64,        // File size
    stx_blocks: u64,          // Number of 512-byte blocks allocated
    stx_attributes_mask: u64, // Mask to show what's supported in stx_attributes

    pub stx_atime: FileStatxTimestamp, // Last access time
    stx_btime: FileStatxTimestamp,     // File creation time
    pub stx_ctime: FileStatxTimestamp, // Last attribute change time
    pub stx_mtime: FileStatxTimestamp, // Last data modification time

    pub stx_rdev_major: u32, // Device ID of special file [if bdev/cdev]
    pub stx_rdev_minor: u32,

    // Note, these are not not public on purpose
    // as they return inconsistent values on filesystems
    // such as btrfs and overlayfs. `stx_mnt_id` should
    // be used instead.
    pub(crate) stx_dev_major: u32, // ID of device containing file [uncond]
    pub(crate) stx_dev_minor: u32,

    pub stx_mnt_id: u64,
    stx_dio_mem_align: u32,    // Memory buffer alignment for direct I/O
    stx_dio_offset_align: u32, // File offset alignment for direct I/O

    stx_subvol: u64, // Subvolume identifier

    stx_atomic_write_unit_min: u32, // Min atomic write unit in bytes
    stx_atomic_write_unit_max: u32, // Max atomic write unit in bytes
    stx_atomic_write_segments_max: u32, // Max atomic write segment count

    stx_dio_read_offset_align: u32, // File offset alignment for direct I/O reads

    stx_atomic_write_unit_max_opt: u32, // Optimised max atomic write unit in bytes
    __statx_spare2: [u32; 1],

    __statx_spare3: [u64; 8], // Spare space for future expansion
}

impl FileStatx {
    pub(crate) fn file_mode(&self) -> libc::mode_t {
        libc::mode_t::from(self.stx_mode) & !libc::S_IFMT
    }

    pub(crate) fn file_type(&self) -> FileType {
        FileType::from(libc::mode_t::from(self.stx_mode))
    }
}

impl Serialize for FileStatx {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut map = serializer.serialize_map(Some(15))?;

        map.serialize_entry("mask", &self.stx_mask)?;
        map.serialize_entry("nlink", &self.stx_nlink)?;
        map.serialize_entry("uid", &self.stx_uid)?;
        map.serialize_entry("gid", &self.stx_gid)?;
        map.serialize_entry("mode", &self.stx_mode)?;
        map.serialize_entry("file_mode", &self.file_mode())?;
        map.serialize_entry("file_type", &self.file_type())?;
        map.serialize_entry("ino", &self.stx_ino)?;
        map.serialize_entry("size", &self.stx_size)?;
        map.serialize_entry("atime", &self.stx_atime)?;
        map.serialize_entry("ctime", &self.stx_ctime)?;
        map.serialize_entry("mtime", &self.stx_mtime)?;
        map.serialize_entry("rdev_major", &self.stx_rdev_major)?;
        map.serialize_entry("rdev_minor", &self.stx_rdev_minor)?;
        map.serialize_entry("mnt_id", &self.stx_mnt_id)?;

        map.end()
    }
}

/// An iterator over directory entries obtained via the `getdents64`
/// system call.
///
/// This iterator yields `DirEntry` instances by reading from a
/// directory file descriptor.
///
/// # Safety
///
/// This struct uses unsafe code to interact with the `getdents64`
/// system call and to parse the resulting buffer into `dirent64`
/// structures. It manages the allocation and deallocation of the buffer
/// used for reading directory entries.
#[derive(Clone)]
pub struct DirIter {
    buffer: NonNull<u8>,
    bufsiz: usize,
    memsiz: usize,
    offset: usize,
}

const DIRENT_ALIGN: usize = align_of::<libc::dirent64>();

impl DirIter {
    /// Creates a new `DirIter` with an allocated buffer of `bufsiz` bytes.
    ///
    /// This buffer is allocated once and reused across directory reads.
    ///
    /// # Parameters
    ///
    /// - `bufsiz`: Size of the buffer, must be properly aligned.
    ///
    /// # Errors
    ///
    /// - `Err(Errno::EINVAL)` for invalid layout.
    /// - `Err::(Errno::ENOMEM)` for allocation failure.
    pub fn new(bufsiz: usize) -> Result<Self, Errno> {
        // Create layout with proper alignment.
        let layout = Layout::from_size_align(bufsiz, DIRENT_ALIGN).or(Err(Errno::EINVAL))?;

        // SAFETY: Allocate buffer with proper alignment and size.
        let buffer = unsafe { alloc(layout) };
        let buffer = NonNull::new(buffer).ok_or(Errno::ENOMEM)?;

        Ok(Self {
            buffer,
            memsiz: bufsiz,
            bufsiz: 0,
            offset: 0,
        })
    }

    /// Read directory entries from the given file descriptor into the
    /// existing buffer.
    ///
    /// Reuses the buffer without additional allocations. Uses the
    /// smaller of provided size or buffer size.
    ///
    /// # Errors
    ///
    /// Returns `Errno` if syscall fails or EOF reached.
    /// EOF is indicated by `Errno::ECANCELED`.
    pub fn readdir<Fd: AsFd>(&mut self, fd: Fd, read_bufsiz: usize) -> Result<&mut Self, Errno> {
        self.offset = 0;
        let bufsiz = read_bufsiz.min(self.memsiz);

        let retsiz = sys_getdents64(fd, self.buffer.as_ptr().cast(), bufsiz)?;
        if retsiz == 0 {
            return Err(Errno::ECANCELED); // EOF or empty directory
        }

        self.bufsiz = retsiz;
        Ok(self)
    }
}

impl<'a> Iterator for &'a mut DirIter {
    type Item = DirEntry<'a>;

    #[expect(clippy::arithmetic_side_effects)]
    fn next(&mut self) -> Option<Self::Item> {
        if self.offset >= self.bufsiz {
            return None;
        }

        // SAFETY: Parse the next dirent safely by borrowing from the buffer.
        unsafe {
            #[expect(clippy::cast_ptr_alignment)]
            let dirent_ptr = self
                .buffer
                .as_ptr()
                .add(self.offset)
                .cast::<libc::dirent64>();
            let d_reclen = (*dirent_ptr).d_reclen as usize;

            // Calculate the name length safely.
            let namelen = libc::strlen((*dirent_ptr).d_name.as_ptr());

            // Borrow the bytes of the `dirent64` structure from the buffer.
            let dirent = std::slice::from_raw_parts(dirent_ptr.cast::<u8>(), d_reclen);

            self.offset += d_reclen;
            Some(DirEntry { dirent, namelen })
        }
    }
}

impl Drop for DirIter {
    fn drop(&mut self) {
        #[expect(clippy::disallowed_methods)]
        let layout = Layout::from_size_align(self.memsiz, DIRENT_ALIGN).unwrap();

        // SAFETY: Deallocate buffer.
        unsafe { dealloc(self.buffer.as_ptr(), layout) };
    }
}

impl fmt::Debug for DirIter {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("DirIter")
            .field("bufsiz", &self.bufsiz)
            .field("memsiz", &self.memsiz)
            .field("offset", &self.offset)
            .finish()
    }
}

/// This struct represents a directory entry.
#[derive(Clone)]
pub struct DirEntry<'a> {
    // The `dirent64` structure.
    dirent: &'a [u8],

    // Size of the file name, in bytes.
    namelen: usize,
}

impl fmt::Debug for DirEntry<'_> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_tuple("DirEntry")
            .field(&self.as_xpath())
            .field(&self.file_type())
            .finish()
    }
}

impl DirEntry<'_> {
    /// Return an `XPath` representing the name of the directory entry.
    pub fn as_xpath(&self) -> &XPath {
        XPath::from_bytes(self.name_bytes())
    }

    /// Returns the raw bytes of the `dirent64` structure.
    pub fn as_bytes(&self) -> &[u8] {
        self.dirent
    }

    /// Returns true if this is the `.` (dot) entry.
    pub fn is_dot(&self) -> bool {
        self.is_dir() && self.as_xpath().is_equal(b".")
    }

    /// Returns true if this is the `..` (dotdot) entry.
    pub fn is_dotdot(&self) -> bool {
        self.is_dir() && self.as_xpath().is_equal(b"..")
    }

    /// Returns true if this is a directory entry.
    pub fn is_dir(&self) -> bool {
        self.file_type().is_dir()
    }

    /// Returns true if this is a regular file entry.
    pub fn is_file(&self) -> bool {
        self.file_type().is_file()
    }

    /// Returns true if this is a symbolic link entry.
    pub fn is_symlink(&self) -> bool {
        self.file_type().is_symlink()
    }

    /// Returns true if this is a block device entry.
    pub fn is_block_device(&self) -> bool {
        self.file_type().is_block_device()
    }

    /// Returns true if this is a character device entry.
    pub fn is_char_device(&self) -> bool {
        self.file_type().is_char_device()
    }

    /// Returns true if this is a FIFO entry.
    pub fn is_fifo(&self) -> bool {
        self.file_type().is_fifo()
    }

    /// Returns true if this is a socket entry.
    pub fn is_socket(&self) -> bool {
        self.file_type().is_socket()
    }

    /// Returns true if this is an unknown entry.
    pub fn is_unknown(&self) -> bool {
        self.file_type().is_unknown()
    }

    /// Returns the file type of the directory entry.
    ///
    /// The return value corresponds to one of the `DT_*` constants defined in `dirent.h`.
    ///
    /// # Safety
    ///
    /// This function assumes that `self.dirent` points to a valid `dirent64` structure,
    /// and that the `d_type` field is accessible without causing undefined behavior.
    pub fn file_type(&self) -> FileType {
        let dirent = self.dirent64();

        // SAFETY: We trust self.dirent points to a valid `dirent64` structure.
        FileType::from(unsafe { (*dirent).d_type })
    }

    /// Return the inode of this `DirEntry`.
    pub fn ino(&self) -> u64 {
        let dirent = self.dirent64();

        // SAFETY: We trust self.dirent points to a valid `dirent64` structure.
        unsafe { (*dirent).d_ino }
    }

    /// Return the size of this `DirEntry`.
    pub fn size(&self) -> usize {
        let dirent = self.dirent64();

        // SAFETY: We trust self.dirent points to a valid `dirent64` structure.
        unsafe { (*dirent).d_reclen as usize }
    }

    /// Return a byte slice of the entry name.
    pub fn name_bytes(&self) -> &[u8] {
        let dirent = self.dirent64();

        // SAFETY: We trust self.dirent points to a valid `dirent64` structure.
        unsafe {
            let d_name = (*dirent).d_name.as_ptr() as *const u8;
            std::slice::from_raw_parts(d_name, self.namelen)
        }
    }

    fn dirent64(&self) -> *const libc::dirent64 {
        // SAFETY: We trust self.dirent points to a valid `dirent64` structure.
        #![allow(clippy::cast_ptr_alignment)]
        self.dirent.as_ptr() as *const libc::dirent64
    }
}

/// Retrieve directory entries from an open directory file descriptor.
///
/// This function returns an iterator over `DirEntry` instances.
///
/// Internally, a thread-local buffer is lazily initialized and reused
/// to avoid repeated memory allocations.
///
/// # Parameters
///
/// - `fd`: The open directory file descriptor.
/// - `bufsiz`: The maximum number of bytes to read into the buffer.
///   If greater than internal buffer size, internal size is used.
///
/// # Returns
///
/// - `Ok`: An iterator over directory entries (`DirEntry`).
/// - `Err`: `Errno` if syscall or allocation errors occur.
///
/// # Safety
///
/// Internally calls the unsafe _getdents64_(2) syscall.
/// The provided file descriptor must be a valid, open directory descriptor.
pub fn getdents64<Fd: AsFd>(
    fd: Fd,
    bufsiz: usize,
) -> Result<impl Iterator<Item = DirEntry<'static>>, Errno> {
    thread_local! {
        static DIR_ITER: RefCell<Option<DirIter>> = const { RefCell::new(None) };
    }

    let iter: &'static mut DirIter = DIR_ITER.with(|cell| {
        // Lazily allocate DirIter with default buffer size.
        let mut borrow = cell.borrow_mut();
        if borrow.is_none() {
            *borrow = Some(DirIter::new(DIRENT_BUF_SIZE)?);
        }

        // SAFETY:
        // 1. We ensured borrow is `Some`.
        // 2. 'static is fine here because the buffer is thread-local
        //    and lives at least as long as the thread.
        let iter: &'static mut DirIter = unsafe {
            std::mem::transmute::<&mut DirIter, &'static mut DirIter>(
                borrow.as_mut().unwrap_unchecked(),
            )
        };

        Ok::<&'static mut DirIter, Errno>(iter)
    })?;

    // Call actual getdents with pre-allocated buffer.
    // Returns ECANCELED on EOF or empty directory.
    iter.readdir(fd, bufsiz)?;

    // Return directory iterator.
    Ok(iter)
}

/// Wrapper for the `getdents64` syscall.
#[expect(clippy::cast_possible_truncation)]
#[expect(clippy::cast_sign_loss)]
fn sys_getdents64<Fd: AsFd>(fd: Fd, buf: *mut libc::c_void, bytes: usize) -> Result<usize, Errno> {
    // SAFETY: In kernel, we trust.
    Errno::result(unsafe {
        libc::syscall(libc::SYS_getdents64, fd.as_fd().as_raw_fd(), buf, bytes)
    })
    .map(|size| size as usize)
}

/// WaitStatus with support for signals that nix' Signal type don't support.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum WaitStatus {
    Exited(Pid, i32),
    Signaled(Pid, i32, bool),
    Stopped(Pid, i32),
    PtraceEvent(Pid, i32, libc::c_int),
    PtraceSyscall(Pid),
    Continued(Pid),
    StillAlive,
}

impl From<NixWaitStatus> for WaitStatus {
    fn from(status: NixWaitStatus) -> Self {
        match status {
            NixWaitStatus::Exited(pid, code) => Self::Exited(pid, code),
            NixWaitStatus::Signaled(pid, signal, core_dump) => {
                Self::Signaled(pid, signal as i32, core_dump)
            }
            NixWaitStatus::Stopped(pid, signal) => Self::Stopped(pid, signal as i32),
            NixWaitStatus::PtraceEvent(pid, signal, event) => {
                WaitStatus::PtraceEvent(pid, signal as i32, event)
            }
            NixWaitStatus::PtraceSyscall(pid) => Self::PtraceSyscall(pid),
            NixWaitStatus::Continued(pid) => Self::Continued(pid),
            NixWaitStatus::StillAlive => Self::StillAlive,
        }
    }
}

fn exited(status: i32) -> bool {
    libc::WIFEXITED(status)
}

fn exit_status(status: i32) -> i32 {
    libc::WEXITSTATUS(status)
}

fn signaled(status: i32) -> bool {
    libc::WIFSIGNALED(status)
}

fn term_signal(status: i32) -> i32 {
    libc::WTERMSIG(status)
}

fn dumped_core(status: i32) -> bool {
    libc::WCOREDUMP(status)
}

fn stopped(status: i32) -> bool {
    libc::WIFSTOPPED(status)
}

fn stop_signal(status: i32) -> i32 {
    libc::WSTOPSIG(status)
}

fn syscall_stop(status: i32) -> bool {
    // From ptrace(2), setting PTRACE_O_TRACESYSGOOD has the effect
    // of delivering SIGTRAP | 0x80 as the signal number for syscall
    // stops. This allows easily distinguishing syscall stops from
    // genuine SIGTRAP signals.
    libc::WSTOPSIG(status) == libc::SIGTRAP | 0x80
}

fn stop_additional(status: i32) -> libc::c_int {
    (status >> 16) as libc::c_int
}

fn continued(status: i32) -> bool {
    libc::WIFCONTINUED(status)
}

impl WaitStatus {
    pub(crate) fn from_raw(pid: Pid, status: i32) -> WaitStatus {
        if exited(status) {
            WaitStatus::Exited(pid, exit_status(status))
        } else if signaled(status) {
            WaitStatus::Signaled(pid, term_signal(status), dumped_core(status))
        } else if stopped(status) {
            let status_additional = stop_additional(status);
            if syscall_stop(status) {
                WaitStatus::PtraceSyscall(pid)
            } else if status_additional == 0 {
                WaitStatus::Stopped(pid, stop_signal(status))
            } else {
                WaitStatus::PtraceEvent(pid, stop_signal(status), stop_additional(status))
            }
        } else {
            assert!(continued(status));
            WaitStatus::Continued(pid)
        }
    }
}

/// Wrapper for the `waitid` syscall
/// This is identical to nix' waitid except we use our custom WaitStatus.
pub fn waitid(id: Id, flags: WaitPidFlag) -> Result<WaitStatus, Errno> {
    #[expect(clippy::cast_sign_loss)]
    let (idtype, idval) = match id {
        Id::All => (libc::P_ALL, 0),
        Id::Pid(pid) => (libc::P_PID, pid.as_raw() as libc::id_t),
        Id::PGid(pid) => (libc::P_PGID, pid.as_raw() as libc::id_t),
        Id::PIDFd(fd) => (libc::P_PIDFD, fd.as_raw_fd() as libc::id_t),
        _ => unreachable!(),
    };

    // SAFETY: In libc, we trust.
    let siginfo = unsafe {
        // Memory is zeroed rather than uninitialized, as not all platforms
        // initialize the memory in the StillAlive case
        let mut siginfo: libc::siginfo_t = std::mem::zeroed();
        Errno::result(libc::waitid(idtype, idval, &raw mut siginfo, flags.bits()))?;
        siginfo
    };

    // SAFETY: In libc, we trust.
    let si_pid = unsafe { siginfo.si_pid() };
    if si_pid == 0 {
        return Ok(WaitStatus::StillAlive);
    }

    assert_eq!(siginfo.si_signo, libc::SIGCHLD);

    let pid = Pid::from_raw(si_pid);
    // SAFETY: In libc, we trust.
    let si_status = unsafe { siginfo.si_status() };

    let status = match siginfo.si_code {
        libc::CLD_EXITED => WaitStatus::Exited(pid, si_status),
        libc::CLD_KILLED | libc::CLD_DUMPED => {
            WaitStatus::Signaled(pid, si_status, siginfo.si_code == libc::CLD_DUMPED)
        }
        libc::CLD_STOPPED => WaitStatus::Stopped(pid, si_status),
        libc::CLD_CONTINUED => WaitStatus::Continued(pid),
        libc::CLD_TRAPPED => {
            if si_status == libc::SIGTRAP | 0x80 {
                WaitStatus::PtraceSyscall(pid)
            } else {
                WaitStatus::PtraceEvent(pid, si_status & 0xff, (si_status >> 8) as libc::c_int)
            }
        }
        _ => return Err(Errno::EINVAL),
    };

    Ok(status)
}

pub(crate) fn pipe2_raw(flags: OFlag) -> Result<(RawFd, RawFd), Errno> {
    let mut fds = std::mem::MaybeUninit::<[RawFd; 2]>::uninit();

    // SAFETY: We use this when nix' version which returns an OwnedFd
    // does not work for our purposes e.g. in mini-threads spawned
    // by network syscall handlers.
    let res = unsafe { libc::pipe2(fds.as_mut_ptr().cast(), flags.bits()) };

    Errno::result(res)?;

    // SAFETY: pipe2 returns a valid array of fds.
    let [read, write] = unsafe { fds.assume_init() };
    Ok((read, write))
}

// sigwaitinfo(2) is not exported by bionic on Android.
// Implement it using sigtimedwait(2) with a NULL timeout.
pub(crate) unsafe fn sigwaitinfo(
    set: *const libc::sigset_t,
    info: *mut libc::siginfo_t,
) -> libc::c_int {
    libc::sigtimedwait(set, info, std::ptr::null())
}

// timer_create(2) is not exported by bionic on Android.
#[expect(clippy::cast_possible_truncation)]
#[expect(clippy::cast_sign_loss)]
pub(crate) unsafe fn timer_create(
    clockid: libc::clockid_t,
    sevp: *mut libc::sigevent,
    timerid: *mut libc::timer_t,
) -> libc::c_int {
    let mut kernel_timer_id: libc::c_int = 0;
    let res = libc::syscall(libc::SYS_timer_create, clockid, sevp, &mut kernel_timer_id);
    if res == 0 {
        *timerid = kernel_timer_id as usize as libc::timer_t;
    }
    res as libc::c_int
}

// timer_settime(2) is not exported by bionic on Android.
#[expect(clippy::cast_possible_truncation)]
pub(crate) unsafe fn timer_settime(
    timerid: libc::timer_t,
    flags: libc::c_int,
    new_value: *const libc::itimerspec,
    old_value: *mut libc::itimerspec,
) -> libc::c_int {
    libc::syscall(
        libc::SYS_timer_settime,
        timerid,
        flags,
        new_value,
        old_value,
    ) as libc::c_int
}

// timer_delete(2) is not exported by bionic on Android.
#[expect(clippy::cast_possible_truncation)]
pub(crate) unsafe fn timer_delete(timerid: libc::timer_t) -> libc::c_int {
    libc::syscall(libc::SYS_timer_delete, timerid) as libc::c_int
}

#[expect(clippy::cast_possible_truncation)]
pub(crate) const PF_UNSPEC: libc::sa_family_t = libc::AF_UNSPEC as libc::sa_family_t;
#[expect(clippy::cast_possible_truncation)]
pub(crate) const PF_UNIX: libc::sa_family_t = libc::AF_UNIX as libc::sa_family_t;
#[expect(clippy::cast_possible_truncation)]
pub(crate) const PF_INET: libc::sa_family_t = libc::AF_INET as libc::sa_family_t;
#[expect(clippy::cast_possible_truncation)]
pub(crate) const PF_INET6: libc::sa_family_t = libc::AF_INET6 as libc::sa_family_t;
#[expect(clippy::cast_possible_truncation)]
pub(crate) const PF_ALG: libc::sa_family_t = libc::AF_ALG as libc::sa_family_t;
#[expect(clippy::cast_possible_truncation)]
pub(crate) const PF_NETLINK: libc::sa_family_t = libc::AF_NETLINK as libc::sa_family_t;
/* From <bits/socket.h>, expect this to be updated regularly. */
pub(crate) const PF_MAX: libc::sa_family_t = 46;

/// nix' SockAddrLike.family() function does not support AF_ALG.
/// This is a workaround that accesses the underlying sockaddr directly.
pub(crate) fn addr_family<T: SockaddrLike>(addr: &T) -> libc::sa_family_t {
    // SAFETY: This is safe as long as addr.as_ptr() returns a valid pointer to a sockaddr.
    unsafe { (*addr.as_ptr()).sa_family }
}

/// Want/got stx_mode & S_IFMT.
pub const STATX_TYPE: libc::c_uint = 0x00000001;
/// Want/got stx_mode & ~S_IFMT.
pub const STATX_MODE: libc::c_uint = 0x00000002;
/// Want/got stx_nlink.
pub const STATX_NLINK: libc::c_uint = 0x00000004;
/// Want/got stx_uid.
pub const STATX_UID: libc::c_uint = 0x00000008;
/// Want/got stx_gid.
pub const STATX_GID: libc::c_uint = 0x00000010;
/// Want/got stx_atime.
pub const STATX_ATIME: libc::c_uint = 0x00000020;
/// Want/got stx_mtime.
pub const STATX_MTIME: libc::c_uint = 0x00000040;
/// Want/got stx_ctime.
pub const STATX_CTIME: libc::c_uint = 0x00000080;
/// Want/got stx_ino.
pub const STATX_INO: libc::c_uint = 0x00000100;
/// Want/got stx_size.
pub const STATX_SIZE: libc::c_uint = 0x00000200;
/// Want/got stx_blocks.
pub const STATX_BLOCKS: libc::c_uint = 0x00000400;
/// Want all the basic stat information.
pub const STATX_BASIC_STATS: libc::c_uint = 0x000007ff;
/// Want/got stx_btime.
pub const STATX_BTIME: libc::c_uint = 0x00000800;
/// Want/Got stx_mnt_id.
pub const STATX_MNT_ID: libc::c_uint = 0x00001000;
/// Want/got direct I/O alignment info.
pub const STATX_DIOALIGN: libc::c_uint = 0x00002000;
/// Want/got extended stx_mount_id, requires Linux>=6.8.
pub const STATX_MNT_ID_UNIQUE: libc::c_uint = 0x00004000;
/// Want/got stx_subvol.
pub const STATX_SUBVOL: libc::c_uint = 0x00008000;
/// Want/got atomic_write_* fields.
pub const STATX_WRITE_ATOMIC: libc::c_uint = 0x00010000;
/// Want/got dio read alignment info.
pub const STATX_DIO_READ_ALIGN: libc::c_uint = 0x00020000;

/// Do what stat(2) does, default.
pub const AT_STATX_SYNC_AS_STAT: libc::c_int = 0x0000;

/// Sync changes with the remote filesystem.
pub const AT_STATX_FORCE_SYNC: libc::c_int = 0x2000;

/// Do not sync with remote filesystem.
pub const AT_STATX_DONT_SYNC: libc::c_int = 0x4000;

/// Safe statx(2) wrapper.
///
/// This function sets the flag AT_STATX_DONT_SYNC if AT_STATX_FORCE_SYNC is not set.
pub fn statx<Fd: AsFd, P: ?Sized + NixPath>(
    dirfd: Fd,
    pathname: &P,
    mut flags: libc::c_int,
    mask: libc::c_uint,
) -> Result<FileStatx, Errno> {
    let dirfd = dirfd.as_fd().as_raw_fd();
    let mut dst = std::mem::MaybeUninit::zeroed();

    // SAFETY: Force DONT_SYNC if FORCE_SYNC is not set.
    if flags & AT_STATX_FORCE_SYNC == 0 {
        flags |= AT_STATX_DONT_SYNC;
    }

    // SAFETY: Neither nix nor libc has a wrapper for statx.
    Errno::result(pathname.with_nix_path(|cstr| unsafe {
        libc::syscall(
            libc::SYS_statx,
            dirfd,
            cstr.as_ptr(),
            flags,
            mask,
            dst.as_mut_ptr(),
        )
    })?)?;

    // SAFETY: statx returned success.
    Ok(unsafe { dst.assume_init() })
}

/// Safe statx(2) wrapper to use with a FD only.
///
/// This function always sets the flag AT_STATX_DONT_SYNC.
pub fn fstatx<Fd: AsFd>(fd: Fd, mask: libc::c_uint) -> Result<FileStatx, Errno> {
    let fd = fd.as_fd().as_raw_fd();
    let mut dst = std::mem::MaybeUninit::zeroed();

    // SAFETY: Neither nix nor libc has a wrapper for statx.
    Errno::result(unsafe {
        libc::syscall(
            libc::SYS_statx,
            fd,
            c"".as_ptr(),
            libc::AT_EMPTY_PATH | AT_STATX_DONT_SYNC,
            mask,
            dst.as_mut_ptr(),
        )
    })?;

    // SAFETY: statx returned success.
    Ok(unsafe { dst.assume_init() })
}

/// Wrapper for struct stat64.
pub(crate) use libc::stat64 as FileStat64;

impl From<FileStatx> for FileStat64 {
    fn from(stx: FileStatx) -> FileStat64 {
        FileStat64::from(&stx)
    }
}

impl From<&FileStatx> for FileStat64 {
    #[expect(clippy::as_underscore)]
    #[expect(clippy::cast_lossless)]
    #[expect(clippy::cast_possible_wrap)]
    fn from(stx: &FileStatx) -> FileStat64 {
        // SAFETY: FileStat64 is a POD struct.
        let mut st: FileStat64 = unsafe { std::mem::zeroed() };

        st.st_ino = stx.stx_ino;
        st.st_nlink = stx.stx_nlink.into();
        st.st_mode = stx.stx_mode.into();
        st.st_uid = stx.stx_uid;
        st.st_gid = stx.stx_gid;
        st.st_size = stx.stx_size as i64;
        st.st_blksize = stx.stx_blksize as _;
        st.st_blocks = stx.stx_blocks as i64;
        st.st_atime = stx.stx_atime.tv_sec as _;
        st.st_atime_nsec = stx.stx_atime.tv_nsec as _;
        st.st_mtime = stx.stx_mtime.tv_sec as _;
        st.st_mtime_nsec = stx.stx_mtime.tv_nsec as _;
        st.st_ctime = stx.stx_ctime.tv_sec as _;
        st.st_ctime_nsec = stx.stx_ctime.tv_nsec as _;

        st.st_dev = makedev(stx.stx_dev_major.into(), stx.stx_dev_minor.into());
        st.st_rdev = makedev(stx.stx_rdev_major.into(), stx.stx_rdev_minor.into());

        st
    }
}

pub(crate) fn fstatat64<Fd: AsFd, P: ?Sized + NixPath>(
    dirfd: Fd,
    pathname: &P,
    flags: libc::c_int,
) -> Result<FileStat64, Errno> {
    const FLAGS: libc::c_int =
        libc::AT_EMPTY_PATH | libc::AT_NO_AUTOMOUNT | libc::AT_SYMLINK_NOFOLLOW;
    if flags & !FLAGS != 0 {
        return Err(Errno::EINVAL);
    }
    Ok(statx(dirfd, pathname, flags, STATX_BASIC_STATS)?.into())
}

#[cfg(target_os = "freebsd")]
pub(crate) type fs_type_t = u32;
#[cfg(target_os = "android")]
pub(crate) type fs_type_t = libc::c_ulong;
#[cfg(all(target_os = "linux", target_arch = "s390x", not(target_env = "musl")))]
pub(crate) type fs_type_t = libc::c_uint;
#[cfg(all(target_os = "linux", target_env = "musl"))]
pub(crate) type fs_type_t = libc::c_ulong;
#[cfg(all(target_os = "linux", target_env = "ohos"))]
pub(crate) type fs_type_t = libc::c_ulong;
#[cfg(all(target_os = "linux", target_env = "uclibc"))]
pub(crate) type fs_type_t = libc::c_int;
#[cfg(all(
    target_os = "linux",
    not(any(
        target_arch = "s390x",
        target_env = "musl",
        target_env = "ohos",
        target_env = "uclibc"
    ))
))]
pub(crate) type fs_type_t = libc::__fsword_t;

/// Filesystem type
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct FsType(fs_type_t);

impl FsType {
    /// Fetches the filesystem type of the given file descriptor.
    pub fn get<Fd: AsFd>(fd: Fd) -> Result<Self, Errno> {
        retry_on_eintr(|| fstatfs64(&fd)).map(|stfs| Self(stfs.0.f_type))
    }

    /// Resolves a filesystem name to a list of `FsType`s.
    pub fn from_name(name: &str) -> Result<Vec<Self>, Errno> {
        // Parse as integer first, and as name next.
        let mut fs_types = Vec::new();

        if let Ok(fs_type) = str2u64(name.as_bytes()) {
            #[expect(clippy::cast_possible_wrap)]
            fs_types.push(Self(fs_type as fs_type_t));
        } else {
            for (fs_name, fs_type) in FS_MAGIC {
                if is_equal(name.as_bytes(), fs_name.as_bytes()) {
                    fs_types.push(fs_type.into());
                }
            }
        }
        if fs_types.is_empty() {
            return Err(Errno::EINVAL);
        }

        Ok(fs_types)
    }

    // WORKAROUND:
    // Check if the file resides on a btrfs|overlayfs.
    // Overlayfs does not report device IDs correctly on
    // fstat, which is a known bug:
    // https://github.com/moby/moby/issues/43512
    // Btrfs has the same issue:
    // https://www.reddit.com/r/btrfs/comments/1clgd8u/different_dev_id_reported_by_statx_and/
    // Assume true on errors for safety.
    pub(crate) fn has_broken_devid(self) -> bool {
        self.is_overlayfs() || self.is_btrfs()
    }

    /// Check if file resides on a hugetlbfs.
    pub fn is_huge_file(self) -> bool {
        self.0 == HUGETLBFS_MAGIC
    }

    /// Check if file resides on a procfs.
    pub fn is_proc(self) -> bool {
        self.0 == PROC_SUPER_MAGIC
    }

    /// Check if file resides on an overlayfs.
    pub fn is_overlayfs(self) -> bool {
        self.0 == OVERLAYFS_SUPER_MAGIC
    }

    /// Check if file resides on a btrfs.
    pub fn is_btrfs(self) -> bool {
        self.0 == BTRFS_SUPER_MAGIC
    }

    /// Check if file resides on a zfs.
    pub fn is_zfs(self) -> bool {
        self.0 == ZFS_SUPER_MAGIC
    }
}

impl fmt::Display for FsType {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let magic = self.0;

        if let Some((name, _)) = FS_MAGIC.iter().find(|(_, m)| *m == magic) {
            f.write_str(name)
        } else {
            write!(f, "{magic:#x}")
        }
    }
}

impl Serialize for FsType {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let magic = self.0;

        if let Some((name, _)) = FS_MAGIC.iter().find(|(_, m)| *m == magic) {
            serializer.serialize_str(name)
        } else {
            #[expect(clippy::cast_sign_loss)]
            serializer.serialize_u64(magic as u64)
        }
    }
}

impl From<fs_type_t> for FsType {
    fn from(fs_type: fs_type_t) -> Self {
        FsType(fs_type)
    }
}

impl From<FsType> for fs_type_t {
    fn from(fs_type: FsType) -> Self {
        fs_type.0
    }
}

impl From<&fs_type_t> for FsType {
    fn from(fs_type: &fs_type_t) -> Self {
        FsType(*fs_type)
    }
}

impl From<&FsType> for fs_type_t {
    fn from(fs_type: &FsType) -> Self {
        fs_type.0
    }
}

const BTRFS_SUPER_MAGIC: fs_type_t = libc::BTRFS_SUPER_MAGIC as fs_type_t;
const HUGETLBFS_MAGIC: fs_type_t = libc::HUGETLBFS_MAGIC as fs_type_t;
const OVERLAYFS_SUPER_MAGIC: fs_type_t = libc::OVERLAYFS_SUPER_MAGIC as fs_type_t;
const PROC_SUPER_MAGIC: fs_type_t = libc::PROC_SUPER_MAGIC as fs_type_t;
const ZFS_SUPER_MAGIC: fs_type_t = 0x2fc12fc1i64 as fs_type_t;

/// Wrapper for struct statfs64
pub struct Statfs64(libc::statfs64);

impl Statfs64 {
    /// Returns filesystem type.
    pub fn fs_type(&self) -> FsType {
        FsType(self.0.f_type)
    }
}

/// Safe wrapper for fstatfs64
pub(crate) fn fstatfs64<Fd: AsFd>(fd: Fd) -> Result<Statfs64, Errno> {
    let mut dst = std::mem::MaybeUninit::uninit();

    // SAFETY: nix does not have a wrapper for fstatfs64.
    Errno::result(unsafe { libc::fstatfs64(fd.as_fd().as_raw_fd(), dst.as_mut_ptr()) })?;

    // SAFETY: fstatfs64 returned success.
    Ok(Statfs64(unsafe { dst.assume_init() }))
}

/// Safe wrapper for epoll_ctl with detailed error handling.
pub fn epoll_ctl_safe<E: AsFd>(
    epoll: &E,
    fd: RawFd,
    event: Option<libc::epoll_event>,
) -> Result<(), Errno> {
    let (result, ignore_errno) = if let Some(mut event) = event {
        (
            // SAFETY: nix deprecated epoll_ctl and Epoll requires an OwnedFd...
            // Ignore EEXIST for EPOLL_CTL_ADD.
            Errno::result(unsafe {
                libc::epoll_ctl(
                    epoll.as_fd().as_raw_fd(),
                    EpollOp::EpollCtlAdd as libc::c_int,
                    fd,
                    &raw mut event,
                )
            }),
            Errno::EEXIST,
        )
    } else {
        (
            // SAFETY: nix deprecated epoll_ctl and Epoll requires an OwnedFd...
            // Ignore ENOENT for EPOLL_CTL_DEL.
            Errno::result(unsafe {
                libc::epoll_ctl(
                    epoll.as_fd().as_raw_fd(),
                    EpollOp::EpollCtlDel as libc::c_int,
                    fd,
                    std::ptr::null_mut(),
                )
            }),
            Errno::ENOENT,
        )
    };
    match result {
        Ok(_) => Ok(()),
        Err(errno) if errno == ignore_errno => Ok(()),
        Err(errno) => Err(errno),
    }
}

/// Safe wrapper for epoll_ctl_mod with detailed error handling.
pub fn epoll_ctl_mod_safe<E: AsFd>(
    epoll: &E,
    fd: RawFd,
    mut event: libc::epoll_event,
) -> Result<(), Errno> {
    // SAFETY: In libc we trust.
    Errno::result(unsafe {
        libc::epoll_ctl(
            epoll.as_fd().as_raw_fd(),
            EpollOp::EpollCtlMod as libc::c_int,
            fd,
            &raw mut event,
        )
    })
    .map(drop)
}

const EPIOCSPARAMS: u64 = 0x40088a01;
const EPIOCGPARAMS: u64 = 0x80088a02;

/// Epoll parameters
#[repr(C)]
pub struct EpollParams {
    /// Number of usecs to busy poll
    pub busy_poll_usecs: u32,
    /// Max packets per poll
    pub busy_poll_budget: u16,
    /// Boolean preference
    pub prefer_busy_poll: u16,
    // pad the struct to a multiple of 64bits
    // must be zero.
    pad: u8,
}

impl EpollParams {
    /// Create a new EpollParams structure.
    pub fn new(busy_poll_usecs: u32, busy_poll_budget: u16, prefer_busy_poll: bool) -> Self {
        let prefer_busy_poll = if prefer_busy_poll { 1 } else { 0 };
        Self {
            busy_poll_usecs,
            busy_poll_budget,
            prefer_busy_poll,
            pad: 0,
        }
    }
}

impl Serialize for EpollParams {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut map = serializer.serialize_map(Some(3))?; // We expect 3 fields.

        // Serialize busy_poll_usecs field.
        map.serialize_entry("busy_poll_usecs", &self.busy_poll_usecs)?;

        // Serialize busy_poll_budget field.
        map.serialize_entry("busy_poll_budget", &self.busy_poll_budget)?;

        // Serialize prefer_busy_poll.
        map.serialize_entry("prefer_busy_poll", &(self.prefer_busy_poll != 0))?;

        map.end()
    }
}

/// Set epoll parameters for the given epoll file descriptor.
/// Requires Linux>=6.9.
pub fn epoll_set_params<Fd: AsFd>(fd: Fd, params: &EpollParams) -> Result<(), Errno> {
    // SAFETY: In libc we trust.
    Errno::result(unsafe {
        libc::syscall(
            libc::SYS_ioctl,
            fd.as_fd().as_raw_fd(),
            EPIOCSPARAMS,
            params,
        )
    })
    .map(drop)
}

/// Get epoll parameters for the given epoll file descriptor.
/// Requires Linux>=6.9.
pub fn epoll_get_params<Fd: AsFd>(fd: Fd) -> Result<EpollParams, Errno> {
    let mut params = std::mem::MaybeUninit::uninit();

    // SAFETY: In libc we trust.
    Errno::result(unsafe {
        libc::syscall(
            libc::SYS_ioctl,
            fd.as_fd().as_raw_fd(),
            EPIOCGPARAMS,
            params.as_mut_ptr(),
        )
    })?;

    // SAFETY: ioctl returned success.
    Ok(unsafe { params.assume_init() })
}

/// Uses getsockopt SO_DOMAIN to get the domain of the given socket.
pub fn getsockdomain<Fd: AsFd>(fd: Fd) -> Result<libc::c_int, Errno> {
    #[expect(clippy::cast_possible_truncation)]
    let mut len = size_of::<libc::c_int>() as libc::socklen_t;
    let mut fml: libc::c_int = 0;

    // SAFETY: In libc we trust.
    Errno::result(unsafe {
        libc::getsockopt(
            fd.as_fd().as_raw_fd(),
            libc::SOL_SOCKET,
            libc::SO_DOMAIN,
            std::ptr::addr_of_mut!(fml) as *mut _,
            &raw mut len,
        )
    })?;

    Ok(fml)
}

/// Unique identifiers for Linux Security Modules.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LsmId {
    Undef,
    Capability,
    Selinux,
    Smack,
    Tomoyo,
    AppArmor,
    Yama,
    LoadPin,
    SafeSetID,
    Lockdown,
    Bpf,
    Landlock,
    Ima,
    Evm,
    Ipe,
    Unknown(u64),
}

impl From<u64> for LsmId {
    fn from(id: u64) -> Self {
        match id {
            0 => LsmId::Undef,
            100 => LsmId::Capability,
            101 => LsmId::Selinux,
            102 => LsmId::Smack,
            103 => LsmId::Tomoyo,
            104 => LsmId::AppArmor,
            105 => LsmId::Yama,
            106 => LsmId::LoadPin,
            107 => LsmId::SafeSetID,
            108 => LsmId::Lockdown,
            109 => LsmId::Bpf,
            110 => LsmId::Landlock,
            111 => LsmId::Ima,
            112 => LsmId::Evm,
            113 => LsmId::Ipe,
            other => LsmId::Unknown(other),
        }
    }
}

impl fmt::Display for LsmId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            LsmId::Undef => write!(f, "undef"),
            LsmId::Capability => write!(f, "capability"),
            LsmId::Selinux => write!(f, "selinux"),
            LsmId::Smack => write!(f, "smack"),
            LsmId::Tomoyo => write!(f, "tomoyo"),
            LsmId::AppArmor => write!(f, "apparmor"),
            LsmId::Yama => write!(f, "yama"),
            LsmId::LoadPin => write!(f, "loadpin"),
            LsmId::SafeSetID => write!(f, "safesetid"),
            LsmId::Lockdown => write!(f, "lockdown"),
            LsmId::Bpf => write!(f, "bpf"),
            LsmId::Landlock => write!(f, "landlock"),
            LsmId::Ima => write!(f, "ima"),
            LsmId::Evm => write!(f, "evm"),
            LsmId::Ipe => write!(f, "ipe"),
            LsmId::Unknown(id) => write!(f, "unknown({id})"),
        }
    }
}

/// LazyLock-initialized system call number for `lsm_list_modules`.
pub static SYS_LSM_LIST_MODULES: LazyLock<libc::c_long> = LazyLock::new(|| {
    ScmpSyscall::from_name("lsm_list_modules")
        .map(i32::from)
        .map(libc::c_long::from)
        .unwrap_or(0)
});

/// Safe wrapper around the `lsm_list_modules` syscall. Requires Linux >= 6.10.
pub fn lsm_list_modules() -> Result<Vec<LsmId>, Errno> {
    let sysno = *SYS_LSM_LIST_MODULES;
    if sysno == 0 {
        return Err(Errno::ENOSYS);
    }

    let mut size: u32 = 0;
    let res = Errno::result(
        // SAFETY: Zero-length buffer & valid pointer to `size`.
        unsafe {
            libc::syscall(
                sysno as libc::c_long,
                std::ptr::null_mut::<u64>(),
                std::ptr::addr_of_mut!(size),
                0u32,
            )
        },
    );

    match res {
        Ok(0) => return Err(Errno::ENOENT),
        Ok(_) => return Err(Errno::EINVAL),
        Err(Errno::E2BIG) => {} // size is filled!
        Err(errno) => return Err(errno),
    }

    if size == 0 {
        // No modules loaded.
        return Err(Errno::ENOENT);
    }

    let count = (size / 8) as usize;
    let mut buf = vec![0u64; count];
    #[expect(clippy::cast_possible_truncation)]
    #[expect(clippy::cast_sign_loss)]
    let count = Errno::result(
        // SAFETY: Properly allocated buffer & valid pointer to `size`.
        unsafe {
            libc::syscall(
                sysno as libc::c_long,
                buf.as_mut_ptr(),
                std::ptr::addr_of_mut!(size),
                0u32,
            )
        },
    )
    .map(|res| res as usize)?;

    if count == 0 {
        // No modules loaded.
        return Err(Errno::ENOENT);
    }

    // Populate output array.
    let mut out = Vec::with_capacity(count);
    for item in buf.iter().take(count).copied().map(LsmId::from) {
        out.push(item)
    }
    Ok(out)
}

// nix does not define RenameFlags for musl.
bitflags! {
    /// Flags for use with `renameat2`.
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct RenameFlags: u32 {
        /// Don't overwrite `new_path` of the rename;
        /// return an error if `new_path` already exists.
        const RENAME_NOREPLACE = 1;

        /// Atomically exchange `old_path` and `new_path`.
        /// Both paths must exist.
        const RENAME_EXCHANGE  = 2;

        /// Create a "whiteout" at the source of the rename
        /// (for overlay/union filesystems).
        const RENAME_WHITEOUT  = 4;
    }
}

bitflags! {
    /// Flags used with `send`, `recv`, etc.
    // Keep in sync with <linux/socket.h>!
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct MsgFlags: i32 {
        /// Process out-of-band data.
        const MSG_OOB = 0x01;
        /// Peek at incoming messages.
        const MSG_PEEK = 0x02;
        /// Don't use local routing.
        const MSG_DONTROUTE = 0x04;
        /// DECnet uses a different name for MSG_DONTROUTE.
        const MSG_TRYHARD = Self::MSG_DONTROUTE.bits();
        /// Control data lost before delivery.
        const MSG_CTRUNC = 0x08;
        /// Supply or ask second address.
        const MSG_PROXY = 0x10;
        /// Truncated message.
        const MSG_TRUNC = 0x20;
        /// Nonblocking IO.
        const MSG_DONTWAIT = 0x40;
        /// End of record.
        const MSG_EOR = 0x80;
        /// Wait for a full request.
        const MSG_WAITALL = 0x100;
        /// End of connection.
        const MSG_FIN = 0x200;
        /// Synchronous operation.
        const MSG_SYN = 0x400;
        /// Confirm path validity.
        const MSG_CONFIRM = 0x800;
        /// Reset connection.
        const MSG_RST = 0x1000;
        /// Fetch message from error queue.
        const MSG_ERRQUEUE = 0x2000;
        /// Do not generate SIGPIPE.
        const MSG_NOSIGNAL = 0x4000;
        /// Sender will send more data.
        const MSG_MORE = 0x8000;
        /// Wait for at least one packet to return.
        const MSG_WAITFORONE = 0x10000;
        /// More messages coming.
        const MSG_BATCH = 0x40000;
        /// Receive devmem skbs as cmsg.
        const MSG_SOCK_DEVMEM = 0x2000000;
        /// Use user data in kernel path.
        const MSG_ZEROCOPY = 0x4000000;
        /// Send data in TCP SYN.
        const MSG_FASTOPEN = 0x20000000;
        /// Set close_on_exit for file descriptors.
        const MSG_CMSG_CLOEXEC = 0x40000000;
        /// Special flag for notifications.
        const MSG_NOTIFICATION = Self::MSG_MORE.bits();
    }
}

#[expect(clippy::disallowed_types)]
use nix::sys::socket::MsgFlags as NixMsgFlags;

#[expect(clippy::disallowed_types)]
impl From<MsgFlags> for NixMsgFlags {
    fn from(msgflags: MsgFlags) -> Self {
        Self::from_bits_retain(msgflags.bits())
    }
}

#[expect(clippy::disallowed_types)]
impl From<NixMsgFlags> for MsgFlags {
    fn from(msgflags: NixMsgFlags) -> Self {
        Self::from_bits_retain(msgflags.bits())
    }
}

bitflags! {
    /// Flags used with inotify_add_watch(2).
    // Keep in sync with <linux/inotify.h>!
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct AddWatchFlags: u32 {
        // The following are legal, implemented events that user-space can watch for.
        /// File was accessed.
        const IN_ACCESS = 0x00000001;
        /// File was modified.
        const IN_MODIFY = 0x00000002;
        /// Metadata changed.
        const IN_ATTRIB = 0x00000004;
        /// Writable file was closed.
        const IN_CLOSE_WRITE = 0x00000008;
        /// Unwritable file closed.
        const IN_CLOSE_NOWRITE = 0x00000010;
        /// File was opened.
        const IN_OPEN = 0x00000020;
        /// File was moved from X.
        const IN_MOVED_FROM = 0x00000040;
        /// File was moved to Y.
        const IN_MOVED_TO = 0x00000080;
        /// Subfile was created.
        const IN_CREATE = 0x00000100;
        /// Subfile was deleted.
        const IN_DELETE = 0x00000200;
        /// Self was deleted.
        const IN_DELETE_SELF = 0x00000400;
        /// Self was moved.
        const IN_MOVE_SELF = 0x00000800;

        // The following are legal events. They are sent as needed to any watch.
        /// Backing fs was unmounted.
        const IN_UNMOUNT = 0x00002000;
        /// Event queued overflowed.
        const IN_Q_OVERFLOW = 0x00004000;
        /// File was ignored.
        const IN_IGNORED = 0x00008000;

        // Helper events.
        /// Helper event: Close.
        const IN_CLOSE = Self::IN_CLOSE_WRITE.bits() | Self::IN_CLOSE_NOWRITE.bits();
        /// Helper event: Moves.
        const IN_MOVE = Self::IN_MOVED_FROM.bits() | Self::IN_MOVED_TO.bits();

        // Special flags.
        /// Only watch the path if it is a directory.
        const IN_ONLYDIR = 0x01000000;
        /// Don't follow a sym link.
        const IN_DONT_FOLLOW = 0x02000000;
        /// Exclude events on unlinked objects.
        const IN_EXCL_UNLINK = 0x04000000;
        /// Only create watches.
        const IN_MASK_CREATE = 0x10000000;
        /// Add to the mask of an already existing watch.
        const IN_MASK_ADD = 0x20000000;
        /// Event occurred against dir.
        const IN_ISDIR = 0x40000000;
        /// Only send event once.
        const IN_ONESHOT = 0x80000000;

        /*
         * All of the events - we build the list by hand so that we can add flags in
         * the future and not break backward compatibility. Apps will get only the
         * events that they originally wanted. Be sure to add new events here!
         */
        const IN_ALL_EVENTS =
            Self::IN_ACCESS.bits() |
            Self::IN_MODIFY.bits() |
            Self::IN_ATTRIB.bits() |
            Self::IN_CLOSE_WRITE.bits() |
            Self::IN_CLOSE_NOWRITE.bits() |
            Self::IN_OPEN.bits() |
            Self::IN_MOVED_FROM.bits() |
            Self::IN_MOVED_TO.bits() |
            Self::IN_DELETE.bits() |
            Self::IN_CREATE.bits() |
            Self::IN_DELETE_SELF.bits() |
            Self::IN_MOVE_SELF.bits();
    }
}

#[expect(clippy::disallowed_types)]
use nix::sys::inotify::AddWatchFlags as NixAddWatchFlags;

#[expect(clippy::disallowed_types)]
impl From<AddWatchFlags> for NixAddWatchFlags {
    fn from(addwatchflags: AddWatchFlags) -> Self {
        Self::from_bits_retain(addwatchflags.bits())
    }
}

#[expect(clippy::disallowed_types)]
impl From<NixAddWatchFlags> for AddWatchFlags {
    fn from(addwatchflags: NixAddWatchFlags) -> Self {
        Self::from_bits_retain(addwatchflags.bits())
    }
}

bitflags! {
    /// Flags for memfd_create(2)
    // nix' MFdFlags does not include MFD_{EXEC,NOEXEC_SEAL} yet!
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct MFdFlags: libc::c_uint {
        /// Close-on-exec
        const MFD_CLOEXEC = libc::MFD_CLOEXEC;

        /// Allow sealing via _fcntl_(2).
        const MFD_ALLOW_SEALING = libc::MFD_ALLOW_SEALING;

        /// Disallow exec (Linux >= 6.3).
        const MFD_NOEXEC_SEAL = libc::MFD_NOEXEC_SEAL;

        /// Allow exec (Linux >= 6.3).
        const MFD_EXEC = libc::MFD_EXEC;

        /// Use hugetlbfs.
        const MFD_HUGETLB = libc::MFD_HUGETLB;

        /// Huge page size: 64KB
        const MFD_HUGE_64KB = libc::MFD_HUGE_64KB;

        /// Huge page size: 512KB
        const MFD_HUGE_512KB = libc::MFD_HUGE_512KB;

        /// Huge page size: 1MB.
        const MFD_HUGE_1MB = libc::MFD_HUGE_1MB;

        /// Huge page size: 2MB.
        const MFD_HUGE_2MB = libc::MFD_HUGE_2MB;

        /// Huge page size: 8MB.
        const MFD_HUGE_8MB = libc::MFD_HUGE_8MB;

        /// Huge page size: 16MB.
        const MFD_HUGE_16MB = libc::MFD_HUGE_16MB;

        /// Huge page size: 32MB.
        const MFD_HUGE_32MB = libc::MFD_HUGE_32MB;

        /// Huge page size: 256MB.
        const MFD_HUGE_256MB = libc::MFD_HUGE_256MB;

        /// Huge page size: 512MB.
        const MFD_HUGE_512MB = libc::MFD_HUGE_512MB;

        /// Huge page size: 1GB.
        const MFD_HUGE_1GB = libc::MFD_HUGE_1GB;

        /// Huge page size: 2GB.
        const MFD_HUGE_2GB = libc::MFD_HUGE_2GB;

        /// Huge page size: 16GB.
        const MFD_HUGE_16GB = libc::MFD_HUGE_16GB;
    }
}

#[expect(clippy::disallowed_types)]
use nix::sys::memfd::MFdFlags as NixMFdFlags;

#[expect(clippy::disallowed_types)]
impl From<MFdFlags> for NixMFdFlags {
    fn from(mfdflags: MFdFlags) -> Self {
        Self::from_bits_retain(mfdflags.bits())
    }
}

#[expect(clippy::disallowed_types)]
impl From<NixMFdFlags> for MFdFlags {
    fn from(mfdflags: NixMFdFlags) -> Self {
        Self::from_bits_retain(mfdflags.bits())
    }
}

//
// openat2(2) wrapper, nix' does not support Android yet.
//

bitflags! {
    /// Flags for openat2(2)
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct ResolveFlag: u64 {
        /// Do not permit the path resolution to succeed if any component of
        /// the resolution is not a descendant of the directory indicated by
        /// dirfd. This causes absolute symbolic links (and absolute values of
        /// pathname) to be rejected.
        const RESOLVE_BENEATH = 0x08;

        /// Treat the directory referred to by dirfd as the root directory
        /// while resolving pathname.
        const RESOLVE_IN_ROOT = 0x10;

        /// Disallow all magic-link resolution during path resolution. Magic
        /// links are symbolic link-like objects that are most notably found
        /// in proc(5); examples include `/proc/[pid]/exe` and `/proc/[pid]/fd/*`.
        ///
        /// See symlink(7) for more details.
        const RESOLVE_NO_MAGICLINKS = 0x02;

        /// Disallow resolution of symbolic links during path resolution. This
        /// option implies RESOLVE_NO_MAGICLINKS.
        const RESOLVE_NO_SYMLINKS = 0x04;

        /// Disallow traversal of mount points during path resolution (including
        /// all bind mounts).
        const RESOLVE_NO_XDEV = 0x01;

        /// Only complete if resolution can be completed through cached lookup.
        /// May return EAGAIN if that's not possible.
        const RESOLVE_CACHED = 0x20;
    }
}

/// This structure represents `struct open_how`.
#[derive(Copy, Clone, Default)]
#[non_exhaustive]
#[repr(C)]
pub struct OpenHow {
    // open_how flags
    pub flags: u64,
    // open_how mode
    pub mode: u64,
    // open_how resolve flags
    pub resolve: u64,
}

impl OpenHow {
    /// Create a new zero-filled `open_how`.
    pub fn new() -> Self {
        Self::default()
    }

    /// Set the open flags used to open a file, completely overwriting any
    /// existing flags.
    pub fn flags(mut self, flags: OFlag) -> Self {
        #[expect(clippy::cast_sign_loss)]
        let flags = flags.bits() as u64;
        self.flags = flags;
        self
    }

    /// Set the file mode new files will be created with, overwriting any
    /// existing flags.
    pub fn mode(mut self, mode: Mode) -> Self {
        let mode = mode.bits().into();
        self.mode = mode;
        self
    }

    /// Set resolve flags, completely overwriting any existing flags.
    ///
    /// See [ResolveFlag] for more detail.
    pub fn resolve(mut self, resolve: ResolveFlag) -> Self {
        let resolve = resolve.bits();
        self.resolve = resolve;
        self
    }
}

// Note openat2(2) may not be available,
// and libc::SYS_openat2 may not be defined.
// Therefore we query the number using libseccomp.
static SYS_OPENAT2: LazyLock<Option<libc::c_long>> = LazyLock::new(|| {
    ScmpSyscall::from_name("openat2")
        .map(i32::from)
        .map(libc::c_long::from)
        .ok()
});

/// Open or create a file for reading, writing or executing.
///
/// `openat2` is an extension of the [`openat`] function that allows the caller
/// to control how path resolution happens.
///
/// # See also
///
/// [openat2](https://man7.org/linux/man-pages/man2/openat2.2.html)
pub fn openat2<P: ?Sized + NixPath, Fd: AsFd>(
    dirfd: Fd,
    path: &P,
    mut how: OpenHow,
) -> Result<OwnedFd, Errno> {
    let sys_openat2 = SYS_OPENAT2.ok_or(Errno::ENOSYS)?;

    // SAFETY: In libc we trust.
    #[expect(clippy::cast_possible_truncation)]
    let fd = path.with_nix_path(|cstr| unsafe {
        libc::syscall(
            sys_openat2,
            dirfd.as_fd().as_raw_fd(),
            cstr.as_ptr(),
            &raw mut how,
            std::mem::size_of::<OpenHow>(),
        )
    })? as RawFd;
    Errno::result(fd)?;

    // SAFETY:
    //
    // `openat2(2)` should return a valid owned fd on success
    Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}

bitflags! {
    /// Mask for defining which events shall be listened with [`Fanotify::mark()`]
    /// and for querying notifications.
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct MaskFlags: u64 {
        /// File was accessed
        const FAN_ACCESS = 0x00000001;
        /// File was modified
        const FAN_MODIFY = 0x00000002;
        /// Metadata changed
        const FAN_ATTRIB = 0x00000004;
        /// Writable file closed
        const FAN_CLOSE_WRITE = 0x00000008;
        /// Unwritable file closed
        const FAN_CLOSE_NOWRITE = 0x00000010;
        /// File was opened
        const FAN_OPEN = 0x00000020;
        /// File was moved from X
        const FAN_MOVED_FROM = 0x00000040;
        /// File was moved to Y
        const FAN_MOVED_TO = 0x00000080;
        /// Subfile was created
        const FAN_CREATE = 0x00000100;
        /// Subfile was deleted
        const FAN_DELETE = 0x00000200;
        /// Self was deleted
        const FAN_DELETE_SELF = 0x00000400;
        /// Self was moved
        const FAN_MOVE_SELF = 0x00000800;
        /// File was opened for exec
        const FAN_OPEN_EXEC = 0x00001000;

        /// Event queued overflowed
        const FAN_Q_OVERFLOW = 0x00004000;
        /// Filesystem error
        const FAN_FS_ERROR = 0x00008000;

        /// File open in perm check
        const FAN_OPEN_PERM = 0x00010000;
        /// File accessed in perm check
        const FAN_ACCESS_PERM = 0x00020000;
        /// File open/exec in perm check
        const FAN_OPEN_EXEC_PERM = 0x00040000;
        // const FAN_DIR_MODIFY = 0x00080000; /* Deprecated (reserved) */

        /// Pre-content access hook
        const FAN_PRE_ACCESS = 0x00100000;
        /// Mount was attached
        const FAN_MNT_ATTACH = 0x01000000;
        /// Mount was detached
        const FAN_MNT_DETACH = 0x02000000;

        /// Interested in child events
        const FAN_EVENT_ON_CHILD = 0x08000000;

        /// File was renamed
        const FAN_RENAME = 0x10000000;

        /// Event occurred against dir
        const FAN_ONDIR = 0x40000000;

        /// Close
        const FAN_CLOSE = Self::FAN_CLOSE_WRITE.bits() | Self::FAN_CLOSE_NOWRITE.bits();
        /// Moves
        const FAN_MOVE = Self::FAN_MOVED_FROM.bits() | Self::FAN_MOVED_TO.bits();
    }
}

bitflags! {
    /// Configuration options for [`Fanotify::mark()`].
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct MarkFlags: libc::c_uint {
        /// Add the events to the marks.
        const FAN_MARK_ADD = 0x00000001;
        /// Remove the events to the marks.
        const FAN_MARK_REMOVE = 0x00000002;
        /// Don't follow symlinks, mark them.
        const FAN_MARK_DONT_FOLLOW = 0x00000004;
        /// Raise an error if filesystem to be marked is not a directory.
        const FAN_MARK_ONLYDIR = 0x00000008;
        /// Events added to or removed from the marks.
        const FAN_MARK_IGNORED_MASK = 0x00000020;
        /// Ignore mask shall survive modify events.
        const FAN_MARK_IGNORED_SURV_MODIFY = 0x00000040;
        /// Remove all marks.
        const FAN_MARK_FLUSH = 0x00000080;
        /// Do not pin inode object in the inode cache. Since Linux 5.19.
        const FAN_MARK_EVICTABLE = 0x00000200;
        /// Events added to or removed from the marks. Since Linux 6.0.
        const FAN_MARK_IGNORE = 0x00000400;

        /// Default flag.
        const FAN_MARK_INODE = 0x00000000;
        /// Mark the mount specified by pathname.
        const FAN_MARK_MOUNT = 0x00000010;
        /// Mark the filesystem specified by pathname. Since Linux 4.20.
        const FAN_MARK_FILESYSTEM = 0x00000100;
        /// Mark the mount namespace specified by pathname.
        const FAN_MARK_MNTNS = 0x00000110;

        /// Combination of `FAN_MARK_IGNORE` and `FAN_MARK_IGNORED_SURV_MODIFY`.
        const FAN_MARK_IGNORE_SURV = Self::FAN_MARK_IGNORE.bits() | Self::FAN_MARK_IGNORED_SURV_MODIFY.bits();
    }
}

/// A fanotify group. This is also a file descriptor that can feed to other
/// interfaces consuming file descriptors.
#[derive(Debug)]
pub struct Fanotify {
    fd: OwnedFd,
}

// Note fanotify_mark(2) may not be available,
// and libc::SYS_fanotify_mark may not be defined.
// Therefore we query the number using libseccomp.
static SYS_FANOTIFY_MARK: LazyLock<Option<libc::c_long>> = LazyLock::new(|| {
    ScmpSyscall::from_name("fanotify_mark")
        .map(i32::from)
        .map(libc::c_long::from)
        .ok()
});

impl Fanotify {
    /// Add, remove, or modify an fanotify mark on a filesystem object.
    ///
    /// Returns a Result containing either `()` on success or errno otherwise.
    ///
    /// For more information, see [fanotify_mark(2)](https://man7.org/linux/man-pages/man7/fanotify_mark.2.html).
    pub fn mark<Fd: AsFd, P: ?Sized + NixPath>(
        &self,
        flags: MarkFlags,
        mask: MaskFlags,
        dirfd: Fd,
        path: Option<&P>,
    ) -> Result<(), Errno> {
        let sys_fanotify_mark = SYS_FANOTIFY_MARK.ok_or(Errno::ENOSYS)?;

        // SAFETY: In libc we trust.
        let res = with_opt_nix_path(path, |p| unsafe {
            libc::syscall(
                sys_fanotify_mark,
                self.fd.as_raw_fd(),
                flags.bits(),
                syscall_ll_e!(mask.bits()),
                dirfd.as_fd().as_raw_fd(),
                p,
            )
        })?;

        Errno::result(res).map(|_| ())
    }
}

impl FromRawFd for Fanotify {
    unsafe fn from_raw_fd(fd: RawFd) -> Self {
        Fanotify {
            // SAFETY: This function is unsafe, caller is trusted.
            fd: unsafe { OwnedFd::from_raw_fd(fd) },
        }
    }
}

impl AsFd for Fanotify {
    fn as_fd(&'_ self) -> BorrowedFd<'_> {
        self.fd.as_fd()
    }
}

impl AsRawFd for Fanotify {
    fn as_raw_fd(&self) -> RawFd {
        self.fd.as_raw_fd()
    }
}

impl From<Fanotify> for OwnedFd {
    fn from(value: Fanotify) -> Self {
        value.fd
    }
}

impl Fanotify {
    /// Constructs a `Fanotify` wrapping an existing `OwnedFd`.
    ///
    /// # Safety
    ///
    /// `OwnedFd` is a valid `Fanotify`.
    pub unsafe fn from_owned_fd(fd: OwnedFd) -> Self {
        Self { fd }
    }
}

pub(crate) fn with_opt_nix_path<P, T, F>(path: Option<&P>, f: F) -> Result<T, Errno>
where
    P: ?Sized + NixPath,
    F: FnOnce(*const libc::c_char) -> T,
{
    match path {
        Some(path) => path.with_nix_path(|p_str| f(p_str.as_ptr())),
        None => Ok(f(std::ptr::null())),
    }
}

bitflags! {
    /// Mode argument flags for fallocate determining operation performed on a given range.
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct FallocateFlags: libc::c_int {
        /// File size is not changed.
        ///
        /// offset + len can be greater than file size.
        const FALLOC_FL_KEEP_SIZE = 0x01;
        /// Deallocates space by creating a hole.
        ///
        /// Must be ORed with FALLOC_FL_KEEP_SIZE. Byte range starts at offset and continues for len bytes.
        const FALLOC_FL_PUNCH_HOLE = 0x02;
        /// Removes byte range from a file without leaving a hole.
        ///
        /// Byte range to collapse starts at offset and continues for len bytes.
        const FALLOC_FL_COLLAPSE_RANGE = 0x8;
        /// Zeroes space in specified byte range.
        ///
        /// Byte range starts at offset and continues for len bytes.
        const FALLOC_FL_ZERO_RANGE = 0x10;
        /// Increases file space by inserting a hole within the file size.
        ///
        /// Does not overwrite existing data. Hole starts at offset and continues for len bytes.
        const FALLOC_FL_INSERT_RANGE = 0x20;
        /// Shared file data extants are made private to the file.
        ///
        /// Guarantees that a subsequent write will not fail due to lack of space.
        const FALLOC_FL_UNSHARE_RANGE = 0x40;
        /// Zeroes a specified file range in such a way that subsequent writes to that
        /// range do not require further changes to the file mapping metadata.
        ///
        /// This flag cannot be specified in conjunction with the FALLOC_FL_KEEP_SIZE.
        const FALLOC_FL_WRITE_ZEROES = 0x80;
    }
}

bitflags! {
    /// Flags used and returned by [`get()`](fn.get.html) and
    /// [`set()`](fn.set.html).
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct Persona: u64 {
        /// Provide the legacy virtual address space layout.
        const ADDR_COMPAT_LAYOUT = 0x0200000;
        /// Disable address-space-layout randomization.
        const ADDR_NO_RANDOMIZE = 0x0040000;
        /// Limit the address space to 32 bits.
        const ADDR_LIMIT_32BIT = 0x0800000;
        /// Use `0xc0000000` as the offset at which to search a virtual memory
        /// chunk on [`mmap(2)`], otherwise use `0xffffe000`.
        ///
        /// [`mmap(2)`]: https://man7.org/linux/man-pages/man2/mmap.2.html
        const ADDR_LIMIT_3GB = 0x8000000;
        /// User-space function pointers to signal handlers point to descriptors.
        const FDPIC_FUNCPTRS = 0x0080000;
        /// Map page 0 as read-only.
        const MMAP_PAGE_ZERO = 0x0100000;
        /// `PROT_READ` implies `PROT_EXEC` for [`mmap(2)`].
        ///
        /// [`mmap(2)`]: https://man7.org/linux/man-pages/man2/mmap.2.html
        const READ_IMPLIES_EXEC = 0x0400000;
        /// No effects.
        const SHORT_INODE = 0x1000000;
        /// [`select(2)`], [`pselect(2)`], and [`ppoll(2)`] do not modify the
        /// returned timeout argument when interrupted by a signal handler.
        ///
        /// [`select(2)`]: https://man7.org/linux/man-pages/man2/select.2.html
        /// [`pselect(2)`]: https://man7.org/linux/man-pages/man2/pselect.2.html
        /// [`ppoll(2)`]: https://man7.org/linux/man-pages/man2/ppoll.2.html
        const STICKY_TIMEOUTS = 0x4000000;
        /// Have [`uname(2)`] report a 2.6.40+ version number rather than a 3.x
        /// version number.
        ///
        /// [`uname(2)`]: https://man7.org/linux/man-pages/man2/uname.2.html
        const UNAME26 = 0x0020000;
        /// No effects.
        const WHOLE_SECONDS = 0x2000000;
    }
}

const fn makedev(major: u64, minor: u64) -> libc::dev_t {
    ((major & 0xffff_f000) << 32)
        | ((major & 0x0000_0fff) << 8)
        | ((minor & 0xffff_ff00) << 12)
        | (minor & 0x0000_00ff)
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(C)]
pub(crate) struct seccomp_notif {
    pub(crate) id: u64,
    pub(crate) pid: u32,
    pub(crate) flags: u32,
    pub(crate) data: seccomp_data,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(C)]
pub(crate) struct seccomp_notif_resp {
    pub(crate) id: u64,
    pub(crate) val: i64,
    pub(crate) error: i32,
    pub(crate) flags: u32,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(C)]
pub(crate) struct seccomp_notif_addfd {
    pub(crate) id: u64,
    pub(crate) flags: u32,
    pub(crate) srcfd: u32,
    pub(crate) newfd: u32,
    pub(crate) newfd_flags: u32,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(C)]
pub(crate) struct seccomp_data {
    pub(crate) nr: libc::c_int,
    pub(crate) arch: u32,
    pub(crate) instruction_pointer: u64,
    pub(crate) args: [u64; 6],
}

/// Returns the current parent-death signal.
pub fn get_pdeathsig() -> Result<Option<Signal>, Errno> {
    // prctl writes into this var
    let mut sig: libc::c_int = 0;

    // SAFETY: In libc we trust.
    let res = unsafe { libc::prctl(libc::PR_GET_PDEATHSIG, &mut sig, 0, 0, 0) };

    match Errno::result(res) {
        Ok(_) => Ok(match sig {
            0 => None,
            _ => Some(Signal::try_from(sig)?),
        }),
        Err(e) => Err(e),
    }
}

/// Set the parent-death signal of the calling process. This is the signal that the calling process
/// will get when its parent dies.
pub fn set_pdeathsig<T: Into<Option<Signal>>>(signal: T) -> Result<(), Errno> {
    let sig = match signal.into() {
        Some(s) => s as libc::c_int,
        None => 0,
    };

    // SAFETY: In libc we trust.
    let res = unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, sig, 0, 0, 0) };
    Errno::result(res).map(drop)
}

/// Set the dumpable attribute which determines if core dumps are created for this process.
pub fn set_dumpable(attribute: bool) -> Result<(), Errno> {
    prctl_set_bool(libc::PR_SET_DUMPABLE, attribute)
}

/// Set the "child subreaper" attribute for this process.
pub fn set_child_subreaper(attribute: bool) -> Result<(), Errno> {
    prctl_set_bool(libc::PR_SET_CHILD_SUBREAPER, attribute)
}

/// Set the name of the calling thread. Strings longer than 15 bytes will be truncated.
pub fn set_name(name: &CStr) -> Result<(), Errno> {
    // SAFETY: In libc we trust.
    let res = unsafe { libc::prctl(libc::PR_SET_NAME, name.as_ptr(), 0, 0, 0) };
    Errno::result(res).map(drop)
}

/// Set the calling threads "no new privs" attribute. Once set this option can not be unset.
pub fn set_no_new_privs() -> Result<(), Errno> {
    prctl_set_bool(libc::PR_SET_NO_NEW_PRIVS, true) // Cannot be unset
}

/// Get the "no new privs" attribute for the calling thread.
pub fn get_no_new_privs() -> Result<bool, Errno> {
    prctl_get_bool(libc::PR_GET_NO_NEW_PRIVS)
}

fn prctl_set_bool(option: libc::c_int, status: bool) -> Result<(), Errno> {
    // SAFETY: In libc we trust.
    let res = unsafe { libc::prctl(option, libc::c_ulong::from(status), 0, 0, 0) };
    Errno::result(res).map(drop)
}

fn prctl_get_bool(option: libc::c_int) -> Result<bool, Errno> {
    // SAFETY: In libc we trust.
    let res = unsafe { libc::prctl(option, 0, 0, 0, 0) };
    Errno::result(res).map(|res| res != 0)
}

/// Safe wrapper for dup3(2).
///
/// Handles EINTR and returns an OwnedFd.
pub fn dup3(oldfd: RawFd, newfd: RawFd, flags: libc::c_int) -> Result<OwnedFd, Errno> {
    // We use SYS_dup3 because Android does not define dup3(2).
    #[expect(clippy::cast_possible_truncation)]
    retry_on_eintr(|| {
        // SAFETY: In libc we trust.
        Errno::result(unsafe { libc::syscall(libc::SYS_dup3, oldfd, newfd, flags) }).map(|fd| {
            // SAFETY: dup3(2) returns a valid FD on success.
            unsafe { OwnedFd::from_raw_fd(fd as RawFd) }
        })
    })
}

/// Set an identifier (or reset it) to the address memory range.
///
/// No-op in release mode.
pub fn set_vma_anon_name(
    _addr: NonNull<libc::c_void>,
    _length: NonZeroUsize,
    _name: Option<&CStr>,
) -> Result<(), Errno> {
    #[cfg(not(debug_assertions))]
    {
        return Ok(());
    }

    #[cfg(debug_assertions)]
    {
        let nameref = match _name {
            Some(n) => n.as_ptr(),
            _ => std::ptr::null(),
        };

        // SAFETY: In libc we trust.
        let res = unsafe {
            libc::prctl(
                libc::PR_SET_VMA,
                libc::PR_SET_VMA_ANON_NAME,
                _addr.as_ptr(),
                _length,
                nameref,
            )
        };
        Errno::result(res).map(drop)
    }
}

/// An enumeration allowing the definition of the expiration time of an alarm,
/// recurring or not.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum Expiration {
    /// Alarm will trigger once after the time given in `TimeSpec`
    OneShot(TimeSpec),
    /// Alarm will trigger after a specified delay and then every interval of
    /// time.
    IntervalDelayed(TimeSpec, TimeSpec),
    /// Alarm will trigger every specified interval of time.
    Interval(TimeSpec),
}

bitflags! {
    /// Flags that are used for arming the timer.
    #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    #[repr(transparent)]
    pub struct TimerSetTimeFlags: libc::c_int {
        /// Flag TFD_TIMER_ABSTIME.
        const TFD_TIMER_ABSTIME = libc::TFD_TIMER_ABSTIME;
        /// Flag TFD_TIMER_CANCEL_ON_SET.
        const TFD_TIMER_CANCEL_ON_SET = libc::TFD_TIMER_CANCEL_ON_SET;
    }
}

/*
 * Constants from <linux/limits.h> not defined by libc yet.
 */

// # chars in an extended attribute name.
pub(crate) const XATTR_NAME_MAX: usize = 255;
// size of an extended attribute value (64k).
pub(crate) const XATTR_SIZE_MAX: usize = 1 << 16;
// size of extended attribute namelist (64k).
pub(crate) const XATTR_LIST_MAX: usize = 1 << 16;

pub(crate) const MAP_FIXED_NOREPLACE: libc::c_int = 0x100000;

pub(crate) const SHM_EXEC: libc::c_int = 0o100000;

#[cfg(not(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6",
    target_arch = "sparc"
)))]
pub(crate) const TIOCEXCL: Ioctl = 0x540C;
#[cfg(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6"
))]
pub(crate) const TIOCEXCL: Ioctl = 0x740d;
#[cfg(target_arch = "sparc")]
pub(crate) const TIOCEXCL: Ioctl = 0x2000740d;

#[cfg(not(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6",
    target_arch = "sparc"
)))]
pub(crate) const TIOCNXCL: Ioctl = 0x540D;
#[cfg(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6"
))]
pub(crate) const TIOCNXCL: Ioctl = 0x740e;
#[cfg(target_arch = "sparc")]
pub(crate) const TIOCNXCL: Ioctl = 0x2000740e;

#[cfg(not(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6",
    target_arch = "powerpc",
    target_arch = "powerpc64",
    target_arch = "sparc"
)))]
pub(crate) const TIOCGEXCL: Ioctl = 0x80045440;
#[cfg(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6",
    target_arch = "powerpc",
    target_arch = "powerpc64",
    target_arch = "sparc"
))]
pub(crate) const TIOCGEXCL: Ioctl = 0x40045440;

#[cfg(not(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6",
    target_arch = "powerpc",
    target_arch = "powerpc64",
    target_arch = "sparc"
)))]
pub(crate) const TIOCGPTPEER: Ioctl = 0x5441;
#[cfg(any(
    target_arch = "mips",
    target_arch = "mips64",
    target_arch = "mips32r6",
    target_arch = "mips64r6"
))]
pub(crate) const TIOCGPTPEER: Ioctl = 0x20005441;
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
pub(crate) const TIOCGPTPEER: Ioctl = 0x20005441;
#[cfg(target_arch = "sparc")]
pub(crate) const TIOCGPTPEER: Ioctl = 0x20007489;
