3737#include <sys/socket.h>
3838#include <unistd.h>
3939#include <netdb.h>
40+ #include <time.h>
4041
4142#include "config.h"
4243#include "utils.h"
4344#include "log.h"
4445#include "shim.h"
4546
47+ // How many seconds shim tries to reconnect to proxy before it exits
48+ #ifndef RECONNECT_TIMEOUT_S
49+ #define RECONNECT_TIMEOUT_S 10
50+ #endif
51+
52+ // Period between attempts to reconnect to proxy in milliseconds
53+ #ifndef RECONNECT_POLL_MS
54+ #define RECONNECT_POLL_MS 333
55+ #endif
56+
4657/* globals */
4758
4859struct pollfd poll_fds [MAX_POLL_FDS ] = {{-1 }};
@@ -337,31 +348,34 @@ send_connect_command(struct cc_shim *shim)
337348 return ret ;
338349}
339350
351+ bool reconnect_to_proxy (struct cc_shim * shim );
352+
340353/*!
341354 * Read message received from proxy.
342355 *
343- * \param fd File descriptor to read the data from.
356+ * \param shim \ref cc_shim
344357 * \param buf Buffer to store the data in.
345358 * \param size Size in bytes to be read.
346359 *
347360 * \return true on success, false otherwise
348361 */
349- bool read_wire_data (int fd , uint8_t * buf , size_t size )
362+ bool read_wire_data (struct cc_shim * shim , uint8_t * buf , size_t size )
350363{
351364 ssize_t ret ;
352365 size_t offset = 0 ;
353366
354- if ( fd < 0 || ! buf ) {
367+ if ( shim -> proxy_sock_fd < 0 || ! buf ) {
355368 return false;
356369 }
357370
358371 while (offset < size ) {
359- ret = recv (fd , buf + offset , size - offset , 0 );
372+ ret = recv (shim -> proxy_sock_fd , buf + offset , size - offset , 0 );
360373 if (ret == 0 ) {
361374 shim_debug ("Received EOF on file descriptor\n" );
362- // TODO: Exit for now, add logic to try to reconnect
363- // to proxy.
364- exit (EXIT_FAILURE );
375+ if (! reconnect_to_proxy (shim )) {
376+ exit (EXIT_FAILURE );
377+ }
378+ return false;
365379 } else if (ret < 0 ) {
366380 shim_error ("Failed to read from fd: %s\n" ,
367381 strerror (errno ));
@@ -403,7 +417,7 @@ read_frame(struct cc_shim *shim)
403417 abort ();
404418 }
405419
406- if (! read_wire_data (shim -> proxy_sock_fd , buf , size )) {
420+ if (! read_wire_data (shim , buf , size )) {
407421 goto error ;
408422 }
409423
@@ -434,8 +448,7 @@ read_frame(struct cc_shim *shim)
434448 abort ();
435449 }
436450
437- if (! read_wire_data (shim -> proxy_sock_fd , buf + size ,
438- header_size_in_bytes - size )) {
451+ if (! read_wire_data (shim , buf + size , header_size_in_bytes - size )) {
439452 shim_error ("Error while reading frame from proxy at %s\n" ,
440453 shim -> proxy_address );
441454 goto error ;
@@ -453,8 +466,7 @@ read_frame(struct cc_shim *shim)
453466 }
454467
455468 memset (buf , 0 , fr -> header .payload_len + 1 );
456- if (! read_wire_data (shim -> proxy_sock_fd , buf ,
457- fr -> header .payload_len )) {
469+ if (! read_wire_data (shim , buf , fr -> header .payload_len )) {
458470 goto error ;
459471 }
460472 fr -> payload = buf ;
@@ -876,7 +888,7 @@ connect_to_proxy(struct cc_shim *shim)
876888
877889 if (connect (sockfd , (struct sockaddr * )& remote ,
878890 sizeof (struct sockaddr_un )) == -1 ) {
879- shim_error ("Error while connecting to proxy "
891+ shim_warning ("Error while connecting to proxy "
880892 "with address %s: %s\n" , shim -> proxy_address ,
881893 strerror (errno ));
882894 goto out ;
@@ -954,6 +966,51 @@ connect_to_proxy(struct cc_shim *shim)
954966 return false;
955967}
956968
969+ inline void sleep_ms (int ms )
970+ {
971+ struct timespec ts = { 0 , ms * 1000000L };
972+ nanosleep (& ts , NULL );
973+ }
974+
975+ /*
976+ * Try to re-establish tcp/unix connection with proxy in shim->timeout seconds.
977+ *
978+ * \param shim \ref cc_shim
979+ *
980+ * \return true on success, false on failure
981+ */
982+ bool reconnect_to_proxy (struct cc_shim * shim )
983+ {
984+ shim_warning ("Reconnecting to cc-proxy (timeout %d s)\n" ,
985+ shim -> timeout );
986+ int time = 0 ;
987+
988+ try_again :
989+ sleep_ms (RECONNECT_POLL_MS );
990+ time += RECONNECT_POLL_MS ;
991+ if (time >= shim -> timeout * 1000 ) {
992+ shim_error ("Failed to reconnect to cc-proxy (timeout %d s)\n" ,
993+ shim -> timeout );
994+ return false;
995+ }
996+
997+ close (shim -> proxy_sock_fd );
998+ if (! connect_to_proxy (shim )) {
999+ goto try_again ;
1000+ }
1001+
1002+ shim_debug ("Sending connect command\n" );
1003+ if (! send_connect_command (shim )) {
1004+ shim_error ("Could not send connect command to cc-proxy\n" );
1005+ goto try_again ;
1006+ }
1007+ // Update poll_fds because connect_to_proxy(shim) might have updated
1008+ // shim->proxy_sock_fd
1009+ add_pollfd (poll_fds , PROXY_SOCK_INDEX , shim -> proxy_sock_fd ,
1010+ POLLIN | POLLPRI );
1011+ return true;
1012+ }
1013+
9571014/*
9581015 * Print version information.
9591016 */
@@ -971,6 +1028,7 @@ print_usage(void) {
9711028 printf (" -c, --container-id Container id\n" );
9721029 printf (" -d, --debug Enable debug output\n" );
9731030 printf (" -t, --token Connection token passed by cc-proxy\n" );
1031+ printf (" -r, --rtimeout Reconnection timeout to cc-proxy in seconds\n" );
9741032 printf (" -u, --uri Connection uri. Supported schemes are tcp: and unix:\n" );
9751033 printf (" -v, --version Show version\n" );
9761034 printf (" -h, --help Display this help message\n" );
@@ -983,6 +1041,7 @@ main(int argc, char **argv)
9831041 .container_id = NULL ,
9841042 .proxy_sock_fd = -1 ,
9851043 .token = NULL ,
1044+ .timeout = RECONNECT_TIMEOUT_S ,
9861045 .proxy_address = NULL ,
9871046 .proxy_port = -1 ,
9881047 };
@@ -999,20 +1058,27 @@ main(int argc, char **argv)
9991058 {"container-id" , required_argument , 0 , 'c' },
10001059 {"debug" , no_argument , 0 , 'd' },
10011060 {"help" , no_argument , 0 , 'h' },
1061+ {"rtimeout" , required_argument , 0 , 'r' },
10021062 {"token" , required_argument , 0 , 't' },
10031063 {"uri" , required_argument , 0 , 'u' },
10041064 {"version" , no_argument , 0 , 'v' },
10051065 { 0 , 0 , 0 , 0 },
10061066 };
10071067
1008- while ((c = getopt_long (argc , argv , "c:dht :u:v" , prog_opts , NULL ))!= -1 ) {
1068+ while ((c = getopt_long (argc , argv , "c:dhr:t :u:v" , prog_opts , NULL ))!= -1 ) {
10091069 switch (c ) {
10101070 case 'c' :
10111071 shim .container_id = strdup (optarg );
10121072 break ;
10131073 case 't' :
10141074 shim .token = strdup (optarg );
10151075 break ;
1076+ case 'r' :
1077+ shim .timeout = atoi (optarg );
1078+ if (shim .timeout <= 0 ) {
1079+ shim .timeout = RECONNECT_TIMEOUT_S ;
1080+ }
1081+ break ;
10161082 case 'd' :
10171083 debug = true;
10181084 break ;
0 commit comments