Add kernel API specification for the socket() system call, documenting
all aspects of socket creation.

Signed-off-by: Sasha Levin <[email protected]>
---
 net/socket.c | 489 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 489 insertions(+)

diff --git a/net/socket.c b/net/socket.c
index 9a0e720f08598..fa42497d72af2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -78,6 +78,7 @@
 #include <linux/pseudo_fs.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/syscall_api_spec.h>
 #include <linux/compat.h>
 #include <linux/kmod.h>
 #include <linux/audit.h>
@@ -89,6 +90,7 @@
 #include <linux/nospec.h>
 #include <linux/indirect_call_wrapper.h>
 #include <linux/io_uring/net.h>
+#include <linux/un.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -1692,6 +1694,493 @@ int __sys_socket(int family, int type, int protocol)
        return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
 }
 
+DEFINE_KERNEL_API_SPEC(sys_socket)
+       KAPI_DESCRIPTION("Create an endpoint for communication")
+       KAPI_LONG_DESC("Creates an endpoint for communication and returns a 
file descriptor "
+                      "that refers to that endpoint. The file descriptor 
returned by a successful "
+                      "call will be the lowest-numbered file descriptor not 
currently open for "
+                      "the process. The socket has the indicated type, which 
specifies the "
+                      "communication semantics. The socket() system call is 
the foundation of "
+                      "all network programming in Linux, providing access to 
various network "
+                      "protocols and communication mechanisms.")
+       KAPI_CONTEXT(KAPI_CTX_PROCESS | KAPI_CTX_SLEEPABLE)
+
+       KAPI_PARAM(0, "family", "int", "Protocol/address family (domain)")
+               KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+               KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+               KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_RANGE)
+               KAPI_PARAM_RANGE(0, 45)  /* AF_UNSPEC to AF_MCTP */
+               KAPI_PARAM_CONSTRAINT("Common families: AF_UNIX (1), AF_INET 
(2), AF_INET6 (10), "
+                                     "AF_NETLINK (16), AF_PACKET (17). Others: 
AF_BLUETOOTH (31), AF_CAN (29), "
+                                     "AF_TIPC (30), AF_VSOCK (40), AF_XDP 
(44). Range: 0-45 (AF_MCTP). "
+                                     "PF_* are aliases. Negative or >= 46 
returns EAFNOSUPPORT.")
+       KAPI_PARAM_END
+
+       KAPI_PARAM(1, "type", "int", "Socket type with optional flags")
+               KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+               KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+               KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_MASK)
+               KAPI_PARAM_VALID_MASK(SOCK_TYPE_MASK | SOCK_CLOEXEC | 
SOCK_NONBLOCK)
+               KAPI_PARAM_CONSTRAINT("Types: SOCK_STREAM (1), SOCK_DGRAM (2), 
SOCK_RAW (3), "
+                                     "SOCK_RDM (4), SOCK_SEQPACKET (5), 
SOCK_DCCP (6), SOCK_PACKET (10-obsolete). "
+                                     "Flags (since 2.6.27): SOCK_NONBLOCK, 
SOCK_CLOEXEC. Range: 0-10.")
+       KAPI_PARAM_END
+
+       KAPI_PARAM(2, "protocol", "int", "Protocol within the family")
+               KAPI_PARAM_FLAGS(KAPI_PARAM_IN)
+               KAPI_PARAM_TYPE(KAPI_TYPE_INT)
+               KAPI_PARAM_CONSTRAINT_TYPE(KAPI_CONSTRAINT_NONE)
+               KAPI_PARAM_CONSTRAINT("Usually 0 to select the default protocol 
for the given family and type. "
+                                     "For AF_INET/AF_INET6: IPPROTO_TCP (6), 
IPPROTO_UDP (17), IPPROTO_ICMP (1), "
+                                     "IPPROTO_RAW (255), etc. Must be >= 0 and 
< IPPROTO_MAX. "
+                                     "For AF_UNIX: only 0 or PF_UNIX (1) 
accepted. "
+                                     "For AF_PACKET: network byte order 
Ethernet protocol (e.g., ETH_P_IP). "
+                                     "For AF_NETLINK: NETLINK_ROUTE, 
NETLINK_AUDIT, etc. (0-31). "
+                                     "Protocol value passed through 
update_socket_protocol() BPF hook which may modify it.")
+       KAPI_PARAM_END
+
+       KAPI_RETURN("long", "File descriptor on success; negative error code on 
failure. "
+                   "On success, returns the lowest available file descriptor. "
+                   "The descriptor is automatically placed in the process's 
file descriptor table. "
+                   "If SOCK_CLOEXEC is set, FD_CLOEXEC is set on the 
descriptor. "
+                   "If SOCK_NONBLOCK is set, O_NONBLOCK is set on the file.")
+               KAPI_RETURN_TYPE(KAPI_TYPE_FD)
+               KAPI_RETURN_CHECK_TYPE(KAPI_RETURN_ERROR_CHECK)
+               KAPI_RETURN_SUCCESS(0)
+       KAPI_RETURN_END
+
+       /* Core error codes from __sock_create() and __sys_socket() */
+       KAPI_ERROR(0, -EAFNOSUPPORT, "EAFNOSUPPORT", "Address family not 
supported",
+                  "The implementation does not support the specified address 
family. "
+                  "Returned when: family < 0 || family >= NPROTO (46); "
+                  "protocol family not registered in net_families[]; "
+                  "protocol family module cannot be loaded; "
+                  "try_module_get() fails on protocol family owner.")
+       KAPI_ERROR(1, -EINVAL, "EINVAL", "Invalid argument",
+                  "Invalid argument specified. Returned when: "
+                  "type < 0 || type >= SOCK_MAX (11); "
+                  "invalid flags in type ((type & ~SOCK_TYPE_MASK) & 
~(SOCK_CLOEXEC | SOCK_NONBLOCK)); "
+                  "other protocol-specific validation failures.")
+       KAPI_ERROR(2, -ENFILE, "ENFILE", "File table overflow",
+                  "The system-wide limit on the total number of open files has 
been reached. "
+                  "Returned when sock_alloc() fails due to new_inode_pseudo() 
failure.")
+       KAPI_ERROR(3, -EMFILE, "EMFILE", "Too many open files",
+                  "The per-process limit on the number of open file 
descriptors has been reached. "
+                  "Returned when sock_map_fd() cannot allocate a new file 
descriptor.")
+       KAPI_ERROR(4, -ENOMEM, "ENOMEM", "Out of memory",
+                  "Insufficient kernel memory available. Can occur in: "
+                  "sk_alloc() when allocating sock structure; "
+                  "protocol-specific init functions; "
+                  "security_sk_alloc() in LSM hooks; "
+                  "various kmalloc()/kmem_cache_alloc() calls.")
+       KAPI_ERROR(5, -ENOBUFS, "ENOBUFS", "No buffer space available",
+                  "Insufficient resources to create socket. Similar to ENOMEM 
but used by "
+                  "some protocol families (e.g., AF_PACKET) to indicate 
resource exhaustion.")
+       KAPI_ERROR(6, -EPROTONOSUPPORT, "EPROTONOSUPPORT", "Protocol not 
supported",
+                  "The protocol is not supported within this domain. Returned 
when: "
+                  "AF_UNIX: protocol != 0 && protocol != PF_UNIX; "
+                  "AF_INET/AF_INET6: protocol not found in inetsw[] array; "
+                  "AF_NETLINK: protocol < 0 || protocol >= MAX_LINKS (32).")
+       KAPI_ERROR(7, -ESOCKTNOSUPPORT, "ESOCKTNOSUPPORT", "Socket type not 
supported",
+                  "The socket type is not supported within this domain. 
Returned when: "
+                  "AF_UNIX: type not in {STREAM, DGRAM, SEQPACKET, RAW}; "
+                  "AF_INET/AF_INET6: no matching (type, protocol) in inetsw[]; 
"
+                  "AF_PACKET: type not in {DGRAM, RAW, PACKET}; "
+                  "AF_NETLINK: type not in {RAW, DGRAM}.")
+       KAPI_ERROR(8, -EPERM, "EPERM", "Operation not permitted",
+                  "Permission denied due to insufficient privileges. Returned 
when: "
+                  "AF_INET/AF_INET6 with SOCK_RAW: missing CAP_NET_RAW; "
+                  "AF_PACKET: missing CAP_NET_RAW; "
+                  "Some protocol families may have additional restrictions.")
+       KAPI_ERROR(9, -EACCES, "EACCES", "Permission denied",
+                  "Permission denied by Linux Security Module (SELinux, 
AppArmor, etc.). "
+                  "Returned by security_socket_create() or 
security_socket_post_create() hooks.")
+       KAPI_ERROR(10, -EAGAIN, "EAGAIN", "Resource temporarily unavailable",
+                  "Transient resource shortage. Can be returned by some 
protocol families "
+                  "during initialization when resources are temporarily 
exhausted.")
+       KAPI_ERROR(11, -EINTR, "EINTR", "Interrupted system call",
+                  "Operation interrupted by signal. Rare for socket() but 
possible if "
+                  "module loading is interrupted or during memory allocation 
with GFP_KERNEL.")
+       KAPI_ERROR(12, -EFAULT, "EFAULT", "Bad address",
+                  "Not directly returned by socket() since all parameters are 
values, not pointers. "
+                  "Listed for completeness as it appears in documentation.")
+       KAPI_ERROR(13, -ENOSYS, "ENOSYS", "Function not implemented",
+                  "Can occur in containers using alt-syscall where socket() is 
not whitelisted, "
+                  "or on architectures where socket() is not implemented.")
+
+       KAPI_ERROR_COUNT(14)
+       KAPI_PARAM_COUNT(3)
+       KAPI_SINCE_VERSION("4.2BSD")
+
+       KAPI_EXAMPLES("/* Create a TCP socket */\n"
+                     "int tcp_sock = socket(AF_INET, SOCK_STREAM, 0);\n"
+                     "if (tcp_sock < 0) {\n"
+                     "    perror(\"socket\");\n"
+                     "    exit(EXIT_FAILURE);\n"
+                     "}\n\n"
+                     "/* Create a non-blocking UDP socket with close-on-exec 
*/\n"
+                     "int udp_sock = socket(AF_INET6, SOCK_DGRAM | 
SOCK_NONBLOCK | SOCK_CLOEXEC, 0);\n\n"
+                     "/* Create a raw ICMP socket (requires CAP_NET_RAW) */\n"
+                     "int raw_sock = socket(AF_INET, SOCK_RAW, 
IPPROTO_ICMP);\n\n"
+                     "/* Create a Unix domain datagram socket */\n"
+                     "int unix_sock = socket(AF_UNIX, SOCK_DGRAM, 0);\n\n"
+                     "/* Create a netlink socket for routing information */\n"
+                     "int nl_sock = socket(AF_NETLINK, SOCK_RAW, 
NETLINK_ROUTE);\n\n"
+                     "/* Create a packet socket for raw Ethernet frames 
(requires CAP_NET_RAW) */\n"
+                     "int packet_sock = socket(AF_PACKET, SOCK_RAW, 
htons(ETH_P_ALL));\n\n"
+                     "/* Create a Bluetooth L2CAP socket */\n"
+                     "int bt_sock = socket(AF_BLUETOOTH, SOCK_SEQPACKET, 
BTPROTO_L2CAP);")
+
+       KAPI_NOTES("Implementation details:\n"
+                  "- Uses RCU to safely access net_families[] array\n"
+                  "- May trigger automatic module loading via 
request_module(\"net-pf-%d\", family)\n"
+                  "- Allocates inode from sock_inode_cache via 
new_inode_pseudo()\n"
+                  "- Each protocol family registers via sock_register() with 
unique family number\n"
+                  "- Socket creation involves: sock_alloc() -> pf->create() -> 
sock_map_fd()\n"
+                  "- The update_socket_protocol() BPF hook can modify the 
protocol parameter\n"
+                  "- LSM hooks called: security_socket_create() and 
security_socket_post_create()\n"
+                  "- Creates struct socket (VFS layer) and struct sock 
(network layer)\n"
+                  "- Socket state initialized to SS_UNCONNECTED\n"
+                  "- File operations set to socket_file_ops\n"
+                  "- The (PF_INET, SOCK_PACKET) combination is deprecated 
since Linux 2.0\n"
+                  "Build-time checks ensure SOCK_CLOEXEC == O_CLOEXEC and flag 
consistency")
+
+       /* Lock specifications */
+       KAPI_LOCK(0, "rcu_read_lock", KAPI_LOCK_RCU)
+               KAPI_LOCK_ACQUIRED
+               KAPI_LOCK_RELEASED
+               KAPI_LOCK_DESC("Protects net_families[] array access during 
protocol family lookup. "
+                              "Acquired before 
rcu_dereference(net_families[family]), "
+                              "released after pf->create() call or on error 
path.")
+       KAPI_LOCK_END
+
+       KAPI_LOCK(1, "pf->owner module refcount", KAPI_LOCK_CUSTOM)
+               KAPI_LOCK_ACQUIRED
+               KAPI_LOCK_RELEASED
+               KAPI_LOCK_DESC("Prevents protocol family module unload during 
socket creation. "
+                              "try_module_get(pf->owner) before pf->create(), "
+                              "module_put(pf->owner) after completion.")
+       KAPI_LOCK_END
+
+       KAPI_LOCK(2, "sock->ops->owner module refcount", KAPI_LOCK_CUSTOM)
+               KAPI_LOCK_ACQUIRED
+               KAPI_LOCK_DESC("Prevents socket operations module unload during 
socket lifetime. "
+                              "try_module_get(sock->ops->owner) after 
successful creation, "
+                              "released only on sock_release() when socket is 
closed.")
+       KAPI_LOCK_END
+
+       KAPI_LOCK_COUNT(3)
+
+       /* Signal handling */
+       KAPI_SIGNAL(0, 0, "Module loading", KAPI_SIGNAL_RECEIVE, 
KAPI_SIGNAL_ACTION_RESTART)
+               KAPI_SIGNAL_CONDITION("CONFIG_MODULES && request_module() 
called")
+               KAPI_SIGNAL_DESC("Module loading via request_module() is 
interruptible. "
+                                "Signal delivery causes -EINTR from modprobe 
execution.")
+               KAPI_SIGNAL_TIMING(KAPI_SIGNAL_TIME_DURING)
+               KAPI_SIGNAL_INTERRUPTIBLE
+       KAPI_SIGNAL_END
+
+       KAPI_SIGNAL_COUNT(1)
+
+       /* Side effects */
+       KAPI_SIDE_EFFECT(0, KAPI_EFFECT_ALLOC_MEMORY | 
KAPI_EFFECT_RESOURCE_CREATE,
+                        "socket structures",
+                        "Allocates struct socket (VFS), struct sock (network), 
and protocol-specific data. "
+                        "Memory from: sock_inode_cache, protocol's slab cache, 
and general kmalloc.")
+               KAPI_EFFECT_CONDITION("Always occurs on successful socket 
creation")
+               KAPI_EFFECT_REVERSIBLE
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT(1, KAPI_EFFECT_RESOURCE_CREATE,
+                        "file descriptor",
+                        "Allocates new file descriptor at lowest available 
index. "
+                        "Creates struct file with socket_file_ops. Sets up 
file->private_data = socket.")
+               KAPI_EFFECT_CONDITION("Always occurs on successful socket 
creation")
+               KAPI_EFFECT_REVERSIBLE
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT(2, KAPI_EFFECT_FILESYSTEM,
+                        "protocol module",
+                        "May trigger request_module(\"net-pf-%d\", family) to 
load protocol module. "
+                        "Executes /sbin/modprobe in userspace context.")
+               KAPI_EFFECT_CONDITION("CONFIG_MODULES=y && 
!net_families[family] && first attempt")
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT(3, KAPI_EFFECT_MODIFY_STATE,
+                        "LSM and audit",
+                        "Calls security_socket_create() pre-creation and 
security_socket_post_create() "
+                        "post-creation. May generate audit events. 
SELinux/AppArmor may deny.")
+               KAPI_EFFECT_CONDITION("CONFIG_SECURITY=y or CONFIG_AUDIT=y")
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT(4, KAPI_EFFECT_MODIFY_STATE,
+                        "BPF programs",
+                        "update_socket_protocol() hook can modify protocol 
parameter. "
+                        "BPF_CGROUP_RUN_PROG_INET_SOCK() may run for 
AF_INET/AF_INET6.")
+               KAPI_EFFECT_CONDITION("BPF programs attached to cgroup or 
socket hooks")
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT(5, KAPI_EFFECT_NETWORK | KAPI_EFFECT_HARDWARE,
+                        "network stack",
+                        "Initializes protocol-specific state. May interact 
with network hardware "
+                        "(e.g., AF_PACKET binds to network interface).")
+               KAPI_EFFECT_CONDITION("Protocol family specific")
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT(6, KAPI_EFFECT_MODIFY_STATE,
+                        "resource accounting",
+                        "Updates task and memory cgroup accounting. Charges 
socket memory to owner. "
+                        "Increments global socket counters.")
+               KAPI_EFFECT_CONDITION("CONFIG_MEMCG=y or other accounting 
enabled")
+       KAPI_SIDE_EFFECT_END
+
+       KAPI_SIDE_EFFECT_COUNT(7)
+
+       /* State transitions */
+       KAPI_STATE_TRANS(0, "file descriptor table",
+                        "n open descriptors", "n+1 open descriptors",
+                        "New fd allocated at min(available). Updates 
current->files->fd_array[]")
+       KAPI_STATE_TRANS_END
+
+       KAPI_STATE_TRANS(1, "socket state machine",
+                        "non-existent", "SS_UNCONNECTED",
+                        "Socket created in unconnected state, ready for bind() 
or connect()")
+       KAPI_STATE_TRANS_END
+
+       KAPI_STATE_TRANS(2, "network namespace",
+                        "no socket", "socket registered",
+                        "Socket associated with current->nsproxy->net_ns 
network namespace")
+       KAPI_STATE_TRANS_END
+
+       KAPI_STATE_TRANS(3, "memory accounting",
+                        "pre-allocation", "memory charged",
+                        "Socket memory charged to owner's memcg and rlimits")
+       KAPI_STATE_TRANS_END
+
+       KAPI_STATE_TRANS_COUNT(4)
+
+       /* Networking-specific specifications */
+
+       /* Socket state specification */
+       KAPI_SOCKET_STATE_REQ(KAPI_SOCK_STATE_UNSPEC)
+       KAPI_SOCKET_STATE_RESULT(KAPI_SOCK_STATE_OPEN)
+       KAPI_SOCKET_STATE_COND("Successful socket creation")
+       KAPI_SOCKET_STATE_PROTOS(KAPI_PROTO_ALL)
+       KAPI_SOCKET_STATE_END
+
+       /* Protocol-specific behaviors - detailed specifications */
+       KAPI_PROTOCOL_BEHAVIOR(0, KAPI_PROTO_TCP,
+               "TCP (Transmission Control Protocol) creates reliable, ordered, 
connection-oriented "
+               "byte streams. Features: 3-way handshake connection 
establishment; sequence numbers "
+               "for ordering; acknowledgments and retransmissions for 
reliability; flow control "
+               "via sliding window; congestion control (Reno/CUBIC/BBR); Nagle 
algorithm for "
+               "small packet aggregation; keep-alive probes; urgent data via 
MSG_OOB. "
+               "Socket combines (AF_INET/AF_INET6, SOCK_STREAM, IPPROTO_TCP).")
+               KAPI_PROTOCOL_FLAGS(0, "TCP-specific socket options via SOL_TCP 
level")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR(1, KAPI_PROTO_UDP,
+               "UDP (User Datagram Protocol) creates unreliable, 
connectionless datagram service. "
+               "Features: no connection establishment; best-effort delivery; 
message boundaries "
+               "preserved; no flow/congestion control; optional checksums; 
multicast/broadcast "
+               "capable; lower overhead than TCP. Maximum datagram size 65507 
bytes (65535 - "
+               "IP header - UDP header). connect() on UDP socket sets default 
destination. "
+               "Socket combines (AF_INET/AF_INET6, SOCK_DGRAM, IPPROTO_UDP).")
+               KAPI_PROTOCOL_FLAGS(0, "UDP-specific options like UDP_CORK via 
SOL_UDP")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR(2, KAPI_PROTO_UNIX,
+               "Unix domain sockets provide high-performance local IPC with 
filesystem-based "
+               "addressing or Linux abstract namespace. Features: reliable 
delivery; in-order "
+               "semantics for SOCK_STREAM; message boundaries for 
SOCK_DGRAM/SOCK_SEQPACKET; "
+               "credential passing via SCM_CREDENTIALS; file descriptor 
passing via SCM_RIGHTS; "
+               "no network overhead; kernel-only data path. SOCK_RAW mapped to 
SOCK_DGRAM. "
+               "Maximum datagram size 130688 bytes by default 
(net.core.wmem_max).")
+               KAPI_PROTOCOL_FLAGS(0, "No Unix-specific socket level; uses 
SOL_SOCKET only")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR(3, KAPI_PROTO_RAW,
+               "Raw sockets provide direct access to network layer (IP) or 
link layer (Ethernet). "
+               "Features: receive/send raw IP packets; implement custom 
protocols; packet "
+               "sniffing; bypass transport layer. IP header included based on 
IP_HDRINCL option. "
+               "Protocol field specifies which protocol to receive 
(IPPROTO_ICMP, etc.) or "
+               "IPPROTO_RAW to send any. Link layer access via AF_PACKET. 
Requires CAP_NET_RAW "
+               "capability. Used by ping, traceroute, nmap, tcpdump.")
+               KAPI_PROTOCOL_FLAGS(0, "IP_HDRINCL and raw-specific options via 
SOL_RAW")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR(4, KAPI_PROTO_PACKET,
+               "Packet sockets provide direct access to link layer (Layer 2). 
Features: "
+               "send/receive raw Ethernet frames; implement network protocols 
in userspace; "
+               "packet capture and injection; access to all packets on 
interface. SOCK_RAW "
+               "provides full Layer 2 header; SOCK_DGRAM provides cooked 
packets without "
+               "Layer 2 header. Protocol specifies Ethernet protocol 
(ETH_P_IP, ETH_P_ALL). "
+               "High-performance variants: PACKET_MMAP, PACKET_FANOUT. 
Requires CAP_NET_RAW.")
+               KAPI_PROTOCOL_FLAGS(0, "Extensive options via SOL_PACKET level")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR(5, KAPI_PROTO_NETLINK,
+               "Netlink sockets provide kernel/user-space communication 
interface. Features: "
+               "reliable datagram service; multicast groups; message-based; 
TLV attributes; "
+               "async notifications; used for routing, netfilter, audit, 
SELinux, etc. "
+               "Protocol specifies subsystem: NETLINK_ROUTE (routing/link), 
NETLINK_FIREWALL, "
+               "NETLINK_NETFILTER, NETLINK_AUDIT, etc. No special capabilities 
for most "
+               "protocols except administrative operations.")
+               KAPI_PROTOCOL_FLAGS(0, "Netlink-specific options and 
attributes")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR(6, KAPI_PROTO_SCTP,
+               "SCTP (Stream Control Transmission Protocol) provides reliable, 
message-oriented "
+               "service with multi-streaming and multi-homing. Features: 
message boundaries; "
+               "ordered/unordered delivery; multi-streaming prevents 
head-of-line blocking; "
+               "multi-homing for redundancy; heartbeats; partial reliability 
extension. "
+               "4-way handshake with cookie mechanism prevents SYN floods. "
+               "Socket combines (AF_INET/AF_INET6, SOCK_STREAM/SOCK_SEQPACKET, 
IPPROTO_SCTP).")
+               KAPI_PROTOCOL_FLAGS(0, "SCTP-specific options via SOL_SCTP 
level")
+       KAPI_PROTOCOL_BEHAVIOR_END
+
+       KAPI_PROTOCOL_BEHAVIOR_COUNT(7)
+
+       /* Buffer specification - not applicable for socket creation */
+       KAPI_BUFFER_SPEC(0)
+       KAPI_BUFFER_SIZE(0, 0, 0)
+       KAPI_BUFFER_END
+
+       /* Async specification - socket creation is synchronous */
+       KAPI_ASYNC_SPEC(KAPI_ASYNC_BLOCK, 0)
+       KAPI_ASYNC_END
+
+       /* Network-specific errors are already covered in main error list */
+
+       /* Address families supported - comprehensive list */
+       KAPI_ADDR_FAMILY(0, AF_UNIX, "AF_UNIX/AF_LOCAL", sizeof(struct 
sockaddr_un), 2, 110)
+               KAPI_ADDR_FORMAT("struct sockaddr_un { sa_family_t sun_family; 
char sun_path[108]; }")
+               KAPI_ADDR_FEATURES(false, false, false)
+               KAPI_ADDR_SPECIAL("Abstract namespace: sun_path[0] == '\\0'; "
+                                 "Autobind: empty sun_path gets random 
abstract address; "
+                                 "Filesystem: normal paths follow filesystem 
permissions")
+               KAPI_ADDR_PORTS(0, 0)  /* No port concept */
+       KAPI_ADDR_FAMILY_END
+
+       KAPI_ADDR_FAMILY(1, AF_INET, "AF_INET", sizeof(struct sockaddr_in), 16, 
16)
+               KAPI_ADDR_FORMAT("struct sockaddr_in { sa_family_t sin_family; 
__be16 sin_port; "
+                                "struct in_addr sin_addr; char sin_zero[8]; }")
+               KAPI_ADDR_FEATURES(true, true, true)
+               KAPI_ADDR_SPECIAL("INADDR_ANY (0.0.0.0) - wildcard; "
+                                 "INADDR_LOOPBACK (127.0.0.1) - loopback; "
+                                 "INADDR_BROADCAST (255.255.255.255) - 
broadcast; "
+                                 "224.0.0.0/4 - multicast range")
+               KAPI_ADDR_PORTS(0, 65535)  /* 0 = ephemeral port assignment */
+       KAPI_ADDR_FAMILY_END
+
+       KAPI_ADDR_FAMILY(2, AF_INET6, "AF_INET6", sizeof(struct sockaddr_in6), 
28, 28)
+               KAPI_ADDR_FORMAT("struct sockaddr_in6 { sa_family_t 
sin6_family; __be16 sin6_port; "
+                                "__be32 sin6_flowinfo; struct in6_addr 
sin6_addr; __u32 sin6_scope_id; }")
+               KAPI_ADDR_FEATURES(true, true, false)  /* No broadcast in IPv6 
*/
+               KAPI_ADDR_SPECIAL("in6addr_any (::) - wildcard; "
+                                 "in6addr_loopback (::1) - loopback; "
+                                 "ff00::/8 - multicast range; "
+                                 "fe80::/10 - link-local; "
+                                 "::ffff:0:0/96 - IPv4-mapped addresses")
+               KAPI_ADDR_PORTS(0, 65535)
+       KAPI_ADDR_FAMILY_END
+
+       KAPI_ADDR_FAMILY(3, AF_NETLINK, "AF_NETLINK", sizeof(struct 
sockaddr_nl), 12, 12)
+               KAPI_ADDR_FORMAT("struct sockaddr_nl { sa_family_t nl_family; 
__u16 nl_pad; "
+                                "__u32 nl_pid; __u32 nl_groups; }")
+               KAPI_ADDR_FEATURES(false, true, false)  /* Multicast via 
nl_groups */
+               KAPI_ADDR_SPECIAL("nl_pid: 0 = kernel; getpid() = this process; 
"
+                                 "nl_groups: bitmask of multicast groups")
+               KAPI_ADDR_PORTS(0, 0)  /* Uses nl_pid instead */
+       KAPI_ADDR_FAMILY_END
+
+       KAPI_ADDR_FAMILY(4, AF_PACKET, "AF_PACKET", sizeof(struct sockaddr_ll), 
20, 20)
+               KAPI_ADDR_FORMAT("struct sockaddr_ll { sa_family_t sll_family; 
__be16 sll_protocol; "
+                                "int sll_ifindex; __u16 sll_hatype; __u8 
sll_pkttype; "
+                                "__u8 sll_halen; __u8 sll_addr[8]; }")
+               KAPI_ADDR_FEATURES(true, true, true)  /* Via sll_pkttype */
+               KAPI_ADDR_SPECIAL("sll_ifindex: 0 = any interface; "
+                                 "sll_protocol: ETH_P_ALL = all protocols; "
+                                 "sll_pkttype: 
PACKET_HOST/BROADCAST/MULTICAST/OTHERHOST")
+               KAPI_ADDR_PORTS(0, 0)  /* Layer 2, no ports */
+       KAPI_ADDR_FAMILY_END
+
+       KAPI_ADDR_FAMILY(5, AF_BLUETOOTH, "AF_BLUETOOTH", sizeof(struct 
sockaddr), 14, 258)
+               KAPI_ADDR_FORMAT("Varies by protocol: sockaddr_l2 (L2CAP), 
sockaddr_rc (RFCOMM), "
+                                "sockaddr_hci (HCI), sockaddr_sco (SCO)")
+               KAPI_ADDR_FEATURES(false, false, false)
+               KAPI_ADDR_SPECIAL("BDADDR_ANY (00:00:00:00:00:00) - any device; 
"
+                                 "BDADDR_LOCAL (00:00:00:ff:ff:ff) - local 
adapter")
+               KAPI_ADDR_PORTS(1, 30)  /* PSM for L2CAP, channel for RFCOMM */
+       KAPI_ADDR_FAMILY_END
+
+       KAPI_ADDR_FAMILY_COUNT(6)
+
+       /* Security specification - use existing capability mechanism */
+       KAPI_CAPABILITY(0, CAP_NET_RAW, "CAP_NET_RAW", 
KAPI_CAP_GRANT_PERMISSION)
+               KAPI_CAP_CONDITION("family == AF_PACKET || type == SOCK_RAW")
+               KAPI_CAP_ALLOWS("Raw socket creation and packet injection")
+               KAPI_CAP_WITHOUT("Permission denied (EPERM)")
+       KAPI_CAPABILITY_END
+
+       KAPI_CAPABILITY_COUNT(1)
+
+       /* Operation characteristics */
+       .is_connection_oriented = false,
+       .is_message_oriented = false,
+       .supports_oob_data = false,
+       .supports_peek = false,
+       .supports_select_poll = false,
+       .is_reentrant = true,
+
+       /* Semantic descriptions */
+       KAPI_NET_DATA_TRANSFER("Not applicable - socket() only creates the 
endpoint")
+
+       /* Additional constraints and validation rules */
+       KAPI_CONSTRAINT(0, "Protocol/Type Compatibility",
+                       "Not all (family, type, protocol) combinations are 
valid. "
+                       "Common valid combinations: "
+                       "(AF_INET, SOCK_STREAM, IPPROTO_TCP); "
+                       "(AF_INET, SOCK_DGRAM, IPPROTO_UDP); "
+                       "(AF_INET, SOCK_RAW, IPPROTO_ICMP); "
+                       "(AF_UNIX, SOCK_STREAM, 0); "
+                       "(AF_UNIX, SOCK_DGRAM, 0); "
+                       "(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); "
+                       "(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)")
+       KAPI_CONSTRAINT_END
+
+       KAPI_CONSTRAINT(1, "Module Loading",
+                       "If protocol family module not loaded, socket() may 
block during "
+                       "request_module() execution. This is interruptible and 
may take "
+                       "significant time. Modules loaded: net-pf-N where N is 
family number.")
+       KAPI_CONSTRAINT_END
+
+       KAPI_CONSTRAINT(2, "Capability Requirements",
+                       "CAP_NET_RAW required for: "
+                       "- AF_INET/AF_INET6 with SOCK_RAW "
+                       "- AF_PACKET with any socket type "
+                       "- Some AF_NETLINK operations require CAP_NET_ADMIN "
+                       "- AF_BLUETOOTH may require CAP_NET_ADMIN for some 
operations")
+       KAPI_CONSTRAINT_END
+
+       KAPI_CONSTRAINT(3, "Network Namespace",
+                       "Socket is created in current->nsproxy->net_ns network 
namespace. "
+                       "Socket is bound to this namespace for its lifetime. "
+                       "Different namespaces have independent network stacks.")
+       KAPI_CONSTRAINT_END
+
+       KAPI_CONSTRAINT(4, "Memory Limits",
+                       "Socket creation respects: "
+                       "- RLIMIT_NOFILE for file descriptor limits "
+                       "- Memory cgroup limits for socket memory "
+                       "- System-wide socket memory limits 
(net.core.somaxconn, etc.) "
+                       "- Per-protocol memory limits")
+       KAPI_CONSTRAINT_END
+
+       KAPI_CONSTRAINT_COUNT(5)
+
+KAPI_END_SPEC;
+
 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 {
        return __sys_socket(family, type, protocol);
-- 
2.39.5


Reply via email to