Skip to content

Commit 87fefe6

Browse files
feat: system resilience + installation automation improvements (#36)
* feat: add install automation, systemd services and fix send thread race condition - Add install targets to Makefiles for kernel module and daemon - Update cloud-init configs with systemd services for bp-socket-daemon, ion-dtn, and ud3tn - Fix include paths from include/bp_socket.h to bp_socket.h - Fix race condition in ion_send_thread by changing while loop condition - Improve logging messages in daemon for better observability - Add missing dependencies (libnl-3-dev, clang-format, sparse) to build configs - Add automatic module loading and service startup in provisioning Signed-off-by: Sylvain Pierrot <pierrot.sylvain14@gmail.com> * chore: improve logs Signed-off-by: Sylvain Pierrot <pierrot.sylvain14@gmail.com> * fix: correct Netlink socket cleanup order in daemon shutdown - Free libevent events before closing Netlink socket to avoid using closed socket Signed-off-by: Sylvain Pierrot <pierrot.sylvain14@gmail.com> * chore: increase send queue limit from 1000 to 5000 Signed-off-by: Sylvain Pierrot <pierrot.sylvain14@gmail.com> --------- Signed-off-by: Sylvain Pierrot <pierrot.sylvain14@gmail.com>
1 parent 4340aa9 commit 87fefe6

14 files changed

Lines changed: 134 additions & 53 deletions

File tree

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: all bp_socket daemon clean format check-format
1+
.PHONY: all bp_socket daemon clean format check-format install
22

33
all: bp_socket daemon
44

@@ -15,3 +15,15 @@ clean:
1515
format:
1616
$(MAKE) -C bp_socket format
1717
$(MAKE) -C daemon format
18+
19+
install:
20+
install -d /usr/local/include
21+
install -m 644 include/bp_socket.h /usr/local/include/
22+
$(MAKE) -C daemon clean
23+
$(MAKE) -C daemon
24+
$(MAKE) -C bp_socket clean
25+
$(MAKE) -C bp_socket
26+
@if lsmod | grep -q "^bp "; then \
27+
sudo rmmod bp; \
28+
fi
29+
sudo insmod bp_socket/bp.ko

Vagrantfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Vagrant.configure("2") do |config|
2424
EOF
2525
ion.vm.provision "shell", inline: <<-EOF
2626
export DEBIAN_FRONTEND=noninteractive
27-
apt install -y curl git ca-certificates make pkg-config libnl-genl-3-dev libevent-dev build-essential linux-headers-$(uname -r)
27+
apt install -y curl git ca-certificates make pkg-config libnl-genl-3-dev libnl-3-dev libevent-dev build-essential clang-format sparse linux-headers-$(uname -r)
2828
2929
cd /opt
3030
wget -q https://github.com/nasa-jpl/ION-DTN/archive/refs/tags/ion-open-source-4.1.3.tar.gz

bp_client.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "include/bp_socket.h"
1+
#include "bp_socket.h"
22
#include <errno.h>
33
#include <pthread.h>
44
#include <signal.h>
@@ -40,7 +40,8 @@ void *send_thread(void *arg) {
4040
message_count);
4141

4242
int flags = 0;
43-
flags |= MSG_ACK_REQUESTED;
43+
// flags |= MSG_ACK_REQUESTED;
44+
flags |= MSG_NO_CUSTODY_REQUIRED;
4445

4546
int ret =
4647
sendto(data->fd, send_buffer, strlen(send_buffer) + 1, flags,

bp_socket/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ PWD := $(shell pwd)
66

77
SRC_FILES := $(wildcard *.c *.h)
88

9-
.PHONY: all clean sparse
9+
.PHONY: all clean sparse install
1010

1111
all:
1212
$(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
@@ -18,4 +18,11 @@ sparse:
1818
$(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules C=1 CHECK=sparse
1919

2020
format:
21-
clang-format -i --style=file $(SRC_FILES)
21+
clang-format -i --style=file $(SRC_FILES)
22+
23+
install: all
24+
@echo "Installing kernel module..."
25+
install -d /lib/modules/$(shell uname -r)/extra
26+
install -m 644 bp.ko /lib/modules/$(shell uname -r)/extra/
27+
depmod -a
28+
@echo "Kernel module installed. Use 'modprobe bp' to load it."

configs/cloud-init/ion.cloud-config.cfg

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,18 @@ packages:
1010
- make
1111
- pkg-config
1212
- libnl-genl-3-dev
13+
- libnl-3-dev
1314
- libevent-dev
1415
- build-essential
16+
- clang-format
17+
- sparse
1518

1619
ssh_pwauth: false
1720
users:
1821
- name: ubuntu
1922
sudo: ALL=(ALL) NOPASSWD:ALL
2023
groups: users,admin
2124
lock_passwd: false
22-
ssh_authorized_keys:
23-
- "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCo3kxULSxGtVb70NZ/dI9HUBizF1ooyGT97vf0ORB7ONG4BnNHp4ijuVpBpo9r9LgB8p+M1dhZarrTaQ2u/1lUDrjNGERMt1HfDWmHzbcqr24oGqqOgDcCGrCmcGxqpWcHAu1HfcVfBO4YeJMHYQOkV/0vS3fdhocbGf0vkE13QAfjVmbZvR/dnaaVIdVeoovC2vz6DKCGgfZDM1GB35WzJ5oXoRsHuOReCywea2p5oB90ttM50bnX/Uv7DRn8e3f/8Pwf0Rp/R/JzlLtAeC9HulNyo4LVlWcom4G55D8/g11n03sp7SR2Zl2VorF6Ep6Phuha1izjHj+aCM2TbxSCq7DJnCW7n3GZ4/DqAMP4cglE0IEjGsM6jrDTdbjMlqt6u1RZ+XgUtJktf9g086s/4Rx29V1wtUbqYfjHf3dwYB59Lj3d/mGvuMY0VpcVehYh2CRShPGoFxJ8+FqJSYQsrlu0CeC0QASBx7LUo7PX4N8QLCrmXf2ELzzGoEQl1UWomyufrd/+KcP8hvbWaZZSgoy4ww+3hHCf+RuuLsj4twyhwsFdS4/M0nVQrvTFjsIjeePI9Cwi/yeRph17AIKIjrxgCxHROz1cg3dZD/1fb2m5m6d+SJkuzEAvsP7eypncGOZpO/djhAMD/AfJpJA15uOl/G84G80bZCCFnpK8Rw== spierrot@Sylvains-Air.localdomain"
2425
shell: /bin/bash
2526

2627
disable_root: true
@@ -49,8 +50,46 @@ write_files:
4950
make
5051
make install
5152
permissions: '0755'
53+
- path: /etc/systemd/system/bp-socket-daemon.service
54+
content: |
55+
[Unit]
56+
Description=BP Socket Daemon
57+
After=network.target
58+
59+
[Service]
60+
Type=simple
61+
User=root
62+
WorkingDirectory=/bp-socket
63+
Environment=LD_LIBRARY_PATH=/usr/local/lib
64+
ExecStart=/bp-socket/daemon/bp_daemon
65+
Restart=always
66+
RestartSec=5
67+
68+
[Install]
69+
WantedBy=multi-user.target
70+
permissions: '0644'
71+
- path: /etc/systemd/system/ion-dtn.service
72+
content: |
73+
[Unit]
74+
Description=ION DTN Stack
75+
After=network.target
76+
Wants=network-online.target
77+
78+
[Service]
79+
Type=oneshot
80+
User=root
81+
WorkingDirectory=/bp-socket/configs
82+
Environment=LD_LIBRARY_PATH=/usr/local/lib
83+
ExecStart=/usr/local/bin/ionstart -I /bp-socket/configs/host.rc
84+
RemainAfterExit=yes
85+
86+
[Install]
87+
WantedBy=multi-user.target
88+
permissions: '0644'
5289

5390
runcmd:
5491
- bash /var/run/scripts/provision.sh
5592
- apt-get -y install linux-headers-$(uname -r)
56-
- netplan apply
93+
- netplan apply
94+
- cd /bp-socket && make && insmod /bp-socket/bp_socket/bp.ko
95+
- systemctl daemon-reload

configs/cloud-init/ud3tn.cloud-config.cfg

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ users:
2020
sudo: ALL=(ALL) NOPASSWD:ALL
2121
groups: users,admin
2222
lock_passwd: false
23-
ssh_authorized_keys:
24-
- "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCo3kxULSxGtVb70NZ/dI9HUBizF1ooyGT97vf0ORB7ONG4BnNHp4ijuVpBpo9r9LgB8p+M1dhZarrTaQ2u/1lUDrjNGERMt1HfDWmHzbcqr24oGqqOgDcCGrCmcGxqpWcHAu1HfcVfBO4YeJMHYQOkV/0vS3fdhocbGf0vkE13QAfjVmbZvR/dnaaVIdVeoovC2vz6DKCGgfZDM1GB35WzJ5oXoRsHuOReCywea2p5oB90ttM50bnX/Uv7DRn8e3f/8Pwf0Rp/R/JzlLtAeC9HulNyo4LVlWcom4G55D8/g11n03sp7SR2Zl2VorF6Ep6Phuha1izjHj+aCM2TbxSCq7DJnCW7n3GZ4/DqAMP4cglE0IEjGsM6jrDTdbjMlqt6u1RZ+XgUtJktf9g086s/4Rx29V1wtUbqYfjHf3dwYB59Lj3d/mGvuMY0VpcVehYh2CRShPGoFxJ8+FqJSYQsrlu0CeC0QASBx7LUo7PX4N8QLCrmXf2ELzzGoEQl1UWomyufrd/+KcP8hvbWaZZSgoy4ww+3hHCf+RuuLsj4twyhwsFdS4/M0nVQrvTFjsIjeePI9Cwi/yeRph17AIKIjrxgCxHROz1cg3dZD/1fb2m5m6d+SJkuzEAvsP7eypncGOZpO/djhAMD/AfJpJA15uOl/G84G80bZCCFnpK8Rw== spierrot@Sylvains-Air.localdomain"
2523
shell: /bin/bash
2624

2725
disable_root: true
@@ -51,9 +49,32 @@ write_files:
5149
make virtualenv
5250
source .venv/bin/activate
5351
make update-virtualenv
54-
permissions: '0755'
52+
permissions: '0755'
53+
- path: /etc/systemd/system/ud3tn.service
54+
content: |
55+
[Unit]
56+
Description=uD3TN Bundle Protocol Implementation
57+
After=network.target
58+
59+
[Service]
60+
Type=simple
61+
User=root
62+
WorkingDirectory=/opt/ud3tn
63+
ExecStart=/opt/ud3tn/build/posix/ud3tn \
64+
--allow-remote-config \
65+
--eid ipn:20.0 \
66+
--aap2-socket ./ud3tn.aap2.socket.2 \
67+
--cla "tcpclv3:*,4556" -L 4
68+
Restart=always
69+
RestartSec=5
70+
71+
[Install]
72+
WantedBy=multi-user.target
73+
permissions: '0644'
5574

5675
runcmd:
5776
- bash /var/run/scripts/provision.sh
5877
- chown -R ubuntu:ubuntu /opt/ud3tn
59-
- netplan apply
78+
- netplan apply
79+
- systemctl daemon-reload
80+
- systemctl enable --now ud3tn.service

daemon/Makefile

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ OBJECTS := $(SOURCES:.c=.o)
1111

1212
SRC_FILES := $(wildcard *.c *.h)
1313

14-
.PHONY: all release clean format
14+
.PHONY: all release clean format install
1515

1616
all: $(EXEC)
1717

@@ -28,4 +28,10 @@ clean:
2828
rm -f $(EXEC) $(OBJECTS)
2929

3030
format:
31-
clang-format -i --style=file $(SRC_FILES)
31+
clang-format -i --style=file $(SRC_FILES)
32+
33+
install: $(EXEC)
34+
@echo "Installing daemon..."
35+
install -d /usr/local/bin
36+
install -m 755 $(EXEC) /usr/local/bin/
37+
@echo "Daemon installed successfully!"

daemon/bp_genl.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ int bp_genl_message_handler(struct nl_msg *msg, void *arg) {
7272
err = nla_parse(attrs, BP_GENL_A_MAX, genlmsg_attrdata(genlhdr, 0), genlmsg_attrlen(genlhdr, 0),
7373
NULL);
7474
if (err < 0) {
75-
log_error("Failed to parse Netlink attributes: %s", nl_geterror(err));
75+
log_error("bp_genl_handle_msg: failed to parse Netlink attributes for cmd %d: %s",
76+
genlhdr->cmd, nl_geterror(err));
7677
return NL_SKIP;
7778
}
7879

@@ -86,7 +87,7 @@ int bp_genl_message_handler(struct nl_msg *msg, void *arg) {
8687
case BP_GENL_CMD_DESTROY_BUNDLE:
8788
return handle_destroy_bundle(daemon, attrs);
8889
default:
89-
log_error("Unknown Generic Netlink command: %d", genlhdr->cmd);
90+
log_error("bp_genl_handle_msg: unknown Generic Netlink command: %d", genlhdr->cmd);
9091
return NL_SKIP;
9192
}
9293
}

daemon/bp_genl_handlers.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@ int handle_open_endpoint(Daemon *daemon, struct nlattr **attrs) {
3030
ret = ion_open_endpoint(node_id, service_id, daemon->genl_bp_sock, &daemon->netlink_mutex,
3131
daemon->genl_bp_family_id);
3232
if (ret == 0) {
33-
log_info("[ipn:%u.%u] OPEN_ENDPOINT: endpoint opened successfully", node_id, service_id);
33+
log_info("[ipn:%u.%u] Endpoint opened: spawning receiver and sender threads", node_id,
34+
service_id);
3435
} else {
35-
log_error("[ipn:%u.%u] OPEN_ENDPOINT: failed to open endpoint (error %d)", node_id,
36+
log_error("handle_open_endpoint: failed to open endpoint ipn:%u.%u (error %d)", node_id,
3637
service_id, ret);
3738
}
3839
return ret;
@@ -51,9 +52,9 @@ int handle_close_endpoint(Daemon *daemon, struct nlattr **attrs) {
5152

5253
int ret = ion_close_endpoint(node_id, service_id);
5354
if (ret == 0) {
54-
log_info("[ipn:%u.%u] CLOSE_ENDPOINT: closing endpoint", node_id, service_id);
55+
log_info("[ipn:%u.%u] Endpoint closed gracefully", node_id, service_id);
5556
} else {
56-
log_error("[ipn:%u.%u] CLOSE_ENDPOINT: failed to close endpoint (error %d)", node_id,
57+
log_error("handle_close_endpoint: failed to close endpoint ipn:%u.%u (error %d)", node_id,
5758
service_id, ret);
5859
}
5960

@@ -89,22 +90,19 @@ int handle_send_bundle(Daemon *daemon, struct nlattr **attrs) {
8990

9091
written = snprintf(dest_eid, sizeof(dest_eid), "ipn:%u.%u", dest_node_id, dest_service_id);
9192
if (written < 0 || written >= (int)sizeof(dest_eid)) {
92-
log_error("[ipn:%u.%u] handle_send_bundle: failed to construct EID string", src_node_id,
93+
log_error("handle_send_bundle: failed to construct EID string for ipn:%u.%u", src_node_id,
9394
src_service_id);
9495
return -EINVAL;
9596
}
9697

9798
ret = endpoint_registry_enqueue_send(src_node_id, src_service_id, dest_eid, payload,
9899
payload_size, flags);
99100
if (ret < 0) {
100-
log_error("[ipn:%u.%u] handle_send_bundle: failed to enqueue send (error: %d)", src_node_id,
101-
src_service_id, ret);
101+
log_error("handle_send_bundle: failed to enqueue send for ipn:%u.%u (error: %d)",
102+
src_node_id, src_service_id, ret);
102103
return ret;
103104
}
104105

105-
log_info("[ipn:%u.%u] SEND_BUNDLE: bundle queued for sending to EID %s, size %zu (bytes)",
106-
src_node_id, src_service_id, dest_eid, payload_size);
107-
108106
return 0;
109107
}
110108

@@ -126,7 +124,7 @@ int handle_destroy_bundle(Daemon *daemon, struct nlattr **attrs) {
126124
return ret;
127125
}
128126

129-
log_info("DESTROY_BUNDLE: bundle consumed by a socket (adu: %llu)", (unsigned long long)adu);
127+
log_info("Bundle consumed: successfully destroyed ADU %llu", (unsigned long long)adu);
130128

131129
return 0;
132130
}

daemon/daemon.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ int daemon_run(Daemon *self) {
4444
log_error("Failed to create libevent base");
4545
return -ENOMEM;
4646
}
47-
log_info("Using libevent version %s with %s behind the scenes", (char *)event_get_version(),
48-
(char *)event_base_get_method(self->base));
47+
log_debug("Using libevent version %s with %s behind the scenes", (char *)event_get_version(),
48+
(char *)event_base_get_method(self->base));
4949

5050
self->event_on_sigint = evsignal_new(self->base, SIGINT, on_sigint, self->base);
5151
if (!self->event_on_sigint) {
@@ -79,8 +79,6 @@ int daemon_run(Daemon *self) {
7979
daemon_free(self);
8080
return -ENOMEM;
8181
}
82-
log_info("Generic Netlink: GENL_BP open socket");
83-
8482
fd = nl_socket_get_fd(self->genl_bp_sock);
8583
self->event_on_nl_sock = event_new(self->base, fd, EV_READ | EV_PERSIST, on_netlink, self);
8684
if (!self->event_on_nl_sock) {
@@ -109,9 +107,8 @@ int daemon_run(Daemon *self) {
109107
return -EAGAIN;
110108
}
111109
sdr = bp_get_sdr();
112-
log_info("Successfully attached to ION");
113110

114-
log_info("Daemon started successfully");
111+
log_info("Daemon started successfully - attached to ION, Netlink ready");
115112
event_base_dispatch(self->base);
116113
log_info("Daemon terminated");
117114

@@ -124,13 +121,12 @@ int daemon_run(Daemon *self) {
124121
void daemon_free(Daemon *self) {
125122
if (!self) return;
126123

127-
bp_genl_socket_destroy(self);
128-
129124
if (self->event_on_nl_sock) event_free(self->event_on_nl_sock);
130125
if (self->event_on_sigpipe) event_free(self->event_on_sigpipe);
131126
if (self->event_on_sigint) event_free(self->event_on_sigint);
132127
if (self->base) event_base_free(self->base);
133128

129+
bp_genl_socket_destroy(self);
134130
pthread_mutex_destroy(&self->netlink_mutex);
135131

136132
#if LIBEVENT_VERSION_NUMBER >= 0x02010000

0 commit comments

Comments
 (0)