Skip to content

Commit 1a98fb2

Browse files
committed
Reconstruct some socket pairs and epoll systems
1 parent 23d0a73 commit 1a98fb2

3 files changed

Lines changed: 144 additions & 113 deletions

File tree

lib/tinykvm/linux/fds.cpp

Lines changed: 113 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -70,104 +70,54 @@ namespace tinykvm
7070
// Deep copy the master epoll FDs
7171
this->m_epoll_fds.clear();
7272
for (auto [vfd, entry] : other.m_epoll_fds) {
73-
// Check if it's a shared epoll fd
74-
if (!entry->shared_epoll_fds.empty())
75-
{
76-
// Check if one of the shared epoll fds is already in the list
77-
bool found = false;
78-
for (auto shared_vfd : entry->shared_epoll_fds) {
79-
auto it = this->m_epoll_fds.find(shared_vfd);
80-
if (it != this->m_epoll_fds.end()) {
81-
// Found a shared epoll fd, so we can *share* the entry
82-
this->m_epoll_fds.insert_or_assign(vfd, it->second);
83-
if (UNLIKELY(this->m_verbose)) {
84-
fprintf(stderr, "TinyKVM: Sharing epoll fd %d with %d\n", vfd, shared_vfd);
85-
}
86-
found = true;
87-
break;
73+
// Check if one of the shared epoll fds is already in the list
74+
bool found = false;
75+
for (auto shared_vfd : entry->shared_epoll_fds) {
76+
auto it = this->m_epoll_fds.find(shared_vfd);
77+
if (it != this->m_epoll_fds.end()) {
78+
// Found a shared epoll fd, so we can *share* the entry
79+
this->m_epoll_fds.insert_or_assign(vfd, it->second);
80+
if (UNLIKELY(this->m_verbose)) {
81+
fprintf(stderr, "TinyKVM: Sharing epoll fd %d with %d\n", vfd, shared_vfd);
8882
}
83+
found = true;
84+
break;
8985
}
90-
if (found) {
91-
// Continue to the next entry
92-
continue;
93-
}
9486
}
95-
auto cloned_entry = std::make_shared<EpollEntry>();
96-
*cloned_entry = *entry;
97-
this->m_epoll_fds.insert_or_assign(vfd, std::move(cloned_entry));
87+
if (!found) {
88+
auto cloned_entry = std::make_shared<EpollEntry>();
89+
*cloned_entry = *entry;
90+
this->m_epoll_fds.insert_or_assign(vfd, std::move(cloned_entry));
91+
}
9892
}
9993
// For each socketpair and pipe2 pair, we need to create a new pair
10094
// and add them to the list of managed file descriptors.
10195
for (auto sp : other.m_sockets) {
102-
// Create a new socketpair or pipe2 pair
103-
int pair[2] = {-1, -1};
104-
switch (sp.type) {
105-
case SocketType::PIPE2:
106-
if (pipe2(pair, 0) < 0) {
107-
fprintf(stderr, "TinyKVM: Failed to create pipe2\n");
108-
throw std::runtime_error("TinyKVM: Failed to create pipe2");
109-
}
110-
// Manage the new pair using *the same* vfd as the original pair
111-
this->manage_as(sp.vfd1, pair[0], false, true);
112-
this->manage_as(sp.vfd2, pair[1], false, true);
113-
if (UNLIKELY(this->m_verbose)) {
114-
fprintf(stderr, "TinyKVM: Created new pipe2 pair %d %d\n", sp.vfd1, sp.vfd2);
115-
}
116-
break;
117-
case SocketType::SOCKETPAIR:
118-
if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, pair) < 0) {
119-
fprintf(stderr, "TinyKVM: Failed to create socketpair\n");
120-
throw std::runtime_error("TinyKVM: Failed to create socketpair");
121-
}
122-
this->manage_as(sp.vfd1, pair[0], true, true);
123-
this->manage_as(sp.vfd2, pair[1], true, true);
124-
if (UNLIKELY(this->m_verbose)) {
125-
fprintf(stderr, "TinyKVM: Created new socketpair %d %d\n", sp.vfd1, sp.vfd2);
126-
}
127-
break;
128-
case SocketType::EVENTFD: {
129-
const int fd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
130-
if (fd < 0) {
131-
fprintf(stderr, "TinyKVM: Failed to create eventfd2\n");
132-
throw std::runtime_error("TinyKVM: Failed to create eventfd2");
133-
}
134-
this->manage_as(sp.vfd1, fd, false, true);
135-
if (UNLIKELY(this->m_verbose)) {
136-
fprintf(stderr, "TinyKVM: Created new eventfd2 %d (%d)\n", sp.vfd1, fd);
137-
}
138-
break;
139-
}
140-
case SocketType::DUPFD: {
141-
// This is a duplicated fd, so we need to create a new one
142-
// and manage it as a duplicate of the original fd.
143-
const int ret = dup(sp.vfd1);
144-
if (ret < 0) {
145-
fprintf(stderr, "TinyKVM: Failed to duplicate a DUPFD during reset\n");
146-
throw std::runtime_error("TinyKVM: Failed to duplicate a DUPFD during reset");
147-
}
148-
this->manage_as(sp.vfd2, ret, false, true);
149-
if (UNLIKELY(this->m_verbose)) {
150-
fprintf(stderr, "TinyKVM: Created new dupfd %d (%d)\n", sp.vfd2, ret);
151-
}
152-
break;
153-
}
154-
case SocketType::LISTEN: {
155-
// This is a listening socket, however it already exists
156-
// as it is shared between the main VM and the forked VMs.
157-
// Instead of re-creating the socket we will just manage it.
158-
Entry& entry = this->manage_as(sp.vfd1, sp.vfd2, true, true);
159-
entry.is_forked = true;
160-
if (UNLIKELY(this->m_verbose)) {
161-
fprintf(stderr, "TinyKVM: Created new listen socket %d (%d)\n", sp.vfd1, sp.vfd2);
162-
}
163-
break;
96+
this->create_socket_pairs_from(sp);
97+
}
98+
}
99+
void FileDescriptors::create_epoll_entry_from(int vfd, EpollEntry& entry)
100+
{
101+
int real_fd = epoll_create1(0);
102+
if (real_fd < 0) {
103+
throw std::runtime_error("TinyKVM: Failed to create epoll fd in create_epoll_entry_from()");
104+
}
105+
this->manage_as(vfd, real_fd, true, true);
106+
107+
for (auto it = entry.epoll_fds.begin(); it != entry.epoll_fds.end(); ) {
108+
const int efd_vfd = it->first;
109+
struct epoll_event& ev = it->second;
110+
const int efd_real_fd = this->translate(efd_vfd);
111+
printf("Adding fd %d (%d) to epoll fd %d (%d)\n",
112+
efd_vfd, efd_real_fd, vfd, real_fd);
113+
if (efd_real_fd >= 0) {
114+
if (epoll_ctl(real_fd, EPOLL_CTL_ADD, efd_real_fd, &ev) < 0) {
115+
throw std::runtime_error("TinyKVM: Failed to add fd to epoll in create_epoll_entry_from()");
164116
}
165-
case SocketType::INVALID:
166-
// Ignore invalid socket types (they cannot be reconstructed)
167-
break;
168-
default:
169-
fprintf(stderr, "TinyKVM: Unknown socket type %d\n", sp.type);
170-
throw std::runtime_error("TinyKVM: Unknown socket type");
117+
++it;
118+
} else {
119+
// Remove the fd from the epoll entry since we can't add it
120+
it = entry.epoll_fds.erase(it);
171121
}
172122
}
173123
}
@@ -614,6 +564,79 @@ namespace tinykvm
614564
pair.vfd1, pair.vfd2, type.c_str());
615565
}
616566
}
567+
void FileDescriptors::create_socket_pairs_from(const SocketPair& sp)
568+
{
569+
// Create a new socketpair or pipe2 pair
570+
int pair[2] = {-1, -1};
571+
switch (sp.type) {
572+
case SocketType::PIPE2:
573+
if (pipe2(pair, 0) < 0) {
574+
fprintf(stderr, "TinyKVM: Failed to create pipe2\n");
575+
throw std::runtime_error("TinyKVM: Failed to create pipe2");
576+
}
577+
// Manage the new pair using *the same* vfd as the original pair
578+
this->manage_as(sp.vfd1, pair[0], false, true);
579+
this->manage_as(sp.vfd2, pair[1], false, true);
580+
if (UNLIKELY(this->m_verbose)) {
581+
fprintf(stderr, "TinyKVM: Created new pipe2 pair %d %d\n", sp.vfd1, sp.vfd2);
582+
}
583+
break;
584+
case SocketType::SOCKETPAIR:
585+
if (socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, pair) < 0) {
586+
fprintf(stderr, "TinyKVM: Failed to create socketpair\n");
587+
throw std::runtime_error("TinyKVM: Failed to create socketpair");
588+
}
589+
this->manage_as(sp.vfd1, pair[0], true, true);
590+
this->manage_as(sp.vfd2, pair[1], true, true);
591+
if (UNLIKELY(this->m_verbose)) {
592+
fprintf(stderr, "TinyKVM: Created new socketpair %d %d\n", sp.vfd1, sp.vfd2);
593+
}
594+
break;
595+
case SocketType::EVENTFD: {
596+
const int fd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
597+
if (fd < 0) {
598+
fprintf(stderr, "TinyKVM: Failed to create eventfd2\n");
599+
throw std::runtime_error("TinyKVM: Failed to create eventfd2");
600+
}
601+
this->manage_as(sp.vfd1, fd, false, true);
602+
if (UNLIKELY(this->m_verbose)) {
603+
fprintf(stderr, "TinyKVM: Created new eventfd2 %d (%d)\n", sp.vfd1, fd);
604+
}
605+
break;
606+
}
607+
case SocketType::DUPFD: {
608+
// This is a duplicated fd, so we need to create a new one
609+
// and manage it as a duplicate of the original fd.
610+
const int ret = dup(sp.vfd1);
611+
if (ret < 0) {
612+
fprintf(stderr, "TinyKVM: Failed to duplicate a DUPFD during reset\n");
613+
throw std::runtime_error("TinyKVM: Failed to duplicate a DUPFD during reset");
614+
}
615+
this->manage_as(sp.vfd2, ret, false, true);
616+
if (UNLIKELY(this->m_verbose)) {
617+
fprintf(stderr, "TinyKVM: Created new dupfd %d (%d)\n", sp.vfd2, ret);
618+
}
619+
break;
620+
}
621+
case SocketType::LISTEN: {
622+
// This is a listening socket, however it already exists
623+
// as it is shared between the main VM and the forked VMs.
624+
// Instead of re-creating the socket we will just manage it.
625+
Entry& entry = this->manage_as(sp.vfd1, sp.vfd2, true, true);
626+
entry.is_forked = true;
627+
if (UNLIKELY(this->m_verbose)) {
628+
fprintf(stderr, "TinyKVM: Created new listen socket %d (%d)\n", sp.vfd1, sp.vfd2);
629+
}
630+
break;
631+
}
632+
case SocketType::INVALID:
633+
// Ignore invalid socket types (they cannot be reconstructed)
634+
break;
635+
default:
636+
fprintf(stderr, "TinyKVM: Unknown socket type %d\n", sp.type);
637+
throw std::runtime_error("TinyKVM: Unknown socket type");
638+
}
639+
}
617640

618641
std::string FileDescriptors::sockaddr_to_string(const struct sockaddr_storage& addr) const
619642
{

lib/tinykvm/linux/fds.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ namespace tinykvm
287287
EpollEntry& get_epoll_entry_for_vfd(int vfd);
288288
const auto& get_epoll_entries() const { return m_epoll_fds; }
289289
auto& get_epoll_entries() { return m_epoll_fds; }
290+
void create_epoll_entry_from(int vfd, EpollEntry& entry);
290291
enum SocketType : int {
291292
INVALID,
292293
PIPE2,
@@ -304,6 +305,7 @@ namespace tinykvm
304305
void add_socket_pair(const SocketPair&);
305306
const auto& get_socket_pairs() const { return m_sockets; }
306307
auto& get_socket_pairs() { return m_sockets; }
308+
void create_socket_pairs_from(const SocketPair& pair);
307309

308310
std::string sockaddr_to_string(const struct sockaddr_storage& addr) const;
309311

lib/tinykvm/machine_state.cpp

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,18 @@ bool Machine::load_snapshot_state()
147147
ColdStartFds* fds = state.next<ColdStartFds>(current);
148148
auto& fdm = this->fds();
149149
fdm.set_vfd_start(fds->next_vfd);
150+
// Create socket pairs first
151+
for (size_t i = 0; i < fds->socket_pairs; i++) {
152+
ColdStartSocketPair* csp = state.next<ColdStartSocketPair>(current);
153+
FileDescriptors::SocketPair sp;
154+
sp.vfd1 = csp->vfd1;
155+
sp.vfd2 = csp->vfd2;
156+
sp.type = FileDescriptors::SocketType(csp->type);
157+
fdm.add_socket_pair(sp);
158+
// Create the (real) socket pairs and manage them
159+
fdm.create_socket_pairs_from(sp);
160+
}
161+
// Create epoll entries
150162
for (size_t i = 0; i < fds->epoll_entries; i++) {
151163
ColdStartEpollEntry* centry = state.next<ColdStartEpollEntry>(current);
152164
auto& entry = fdm.get_epoll_entry_for_vfd(centry->vfd);
@@ -158,14 +170,8 @@ bool Machine::load_snapshot_state()
158170
ColdStartSharedEpollFd* csefd = state.next<ColdStartSharedEpollFd>(current);
159171
entry.shared_epoll_fds.insert(csefd->vfd);
160172
}
161-
}
162-
for (size_t i = 0; i < fds->socket_pairs; i++) {
163-
ColdStartSocketPair* csp = state.next<ColdStartSocketPair>(current);
164-
FileDescriptors::SocketPair sp;
165-
sp.vfd1 = csp->vfd1;
166-
sp.vfd2 = csp->vfd2;
167-
sp.type = FileDescriptors::SocketType(csp->type);
168-
fdm.add_socket_pair(sp);
173+
// Create the (real) epoll system and manage it
174+
fdm.create_epoll_entry_from(centry->vfd, entry);
169175
}
170176

171177
} catch (const MachineException& me) {
@@ -228,6 +234,21 @@ void Machine::save_snapshot_state_now() const
228234
const auto& epoll_entries = fdm.get_epoll_entries();
229235
fds->epoll_entries = epoll_entries.size();
230236
fds->socket_pairs = fdm.get_socket_pairs().size();
237+
// Socket pair reconstruction entries
238+
for (const auto& sp : fdm.get_socket_pairs()) {
239+
ColdStartSocketPair* csp = state.next<ColdStartSocketPair>(current);
240+
if (sp.type == FileDescriptors::INVALID || sp.type == FileDescriptors::DUPFD) {
241+
// Silently ignore invalid or dupfd socket pairs that cannot be reconstructed
242+
// when re-loading the state anyway
243+
csp->vfd1 = -1;
244+
csp->vfd2 = -1;
245+
csp->type = int(FileDescriptors::INVALID);
246+
continue;
247+
}
248+
csp->vfd1 = sp.vfd1;
249+
csp->vfd2 = sp.vfd2;
250+
csp->type = int(sp.type);
251+
}
231252
// Epoll reconstruction entries
232253
for (const auto& [vfd, entry] : epoll_entries) {
233254
ColdStartEpollEntry* centry = state.next<ColdStartEpollEntry>(current);
@@ -244,21 +265,6 @@ void Machine::save_snapshot_state_now() const
244265
csefd->vfd = sevfd;
245266
}
246267
}
247-
// Socket pair reconstruction entries
248-
for (const auto& sp : fdm.get_socket_pairs()) {
249-
ColdStartSocketPair* csp = state.next<ColdStartSocketPair>(current);
250-
if (sp.type == FileDescriptors::INVALID || sp.type == FileDescriptors::DUPFD) {
251-
// Silently ignore invalid or dupfd socket pairs that cannot be reconstructed
252-
// when re-loading the state anyway
253-
csp->vfd1 = -1;
254-
csp->vfd2 = -1;
255-
csp->type = int(FileDescriptors::INVALID);
256-
continue;
257-
}
258-
csp->vfd1 = sp.vfd1;
259-
csp->vfd2 = sp.vfd2;
260-
csp->type = int(sp.type);
261-
}
262268

263269
// Finally, set the size
264270
state.size = static_cast<uint32_t>(

0 commit comments

Comments
 (0)