diff --git a/net-dhcp/network.py b/net-dhcp/network.py index 61ce739..d8ea7db 100644 --- a/net-dhcp/network.py +++ b/net-dhcp/network.py @@ -34,7 +34,7 @@ container_dhcp_clients = {} def cleanup_dhcp(): for endpoint, dhcp in container_dhcp_clients.items(): logger.warning('cleaning up orphaned container DHCP client (endpoint "%s")', endpoint) - dhcp.finish(timeout=1) + dhcp.stop() def veth_pair(e): return f'dh-{e[:12]}', f'{e[:12]}-dh' @@ -247,23 +247,36 @@ def join(): def leave(): return jsonify({}) +# Trying to grab the container's attributes (to get the network namespace) +# will deadlock, so we must defer starting the DHCP client +class ContainerDHCPManager: + def __init__(self, network, endpoint): + self.network = network + self.endpoint = endpoint + + self._thread = threading.Thread(target=self.run) + self._thread.start() + + def run(self): + iface = endpoint_container_iface(self.network, self.endpoint) + self.dhcp = udhcpc.DHCPClient(iface) + logger.info('Starting DHCP client on %s in container namespace %s', iface['ifname'], \ + self.dhcp.netns) + def stop(self): + logger.info('Shutting down DHCP client on %s in container namespace %s', \ + self.dhcp.iface['ifname'], self.dhcp.netns) + self.dhcp.finish(timeout=1) + ndb.sources.remove(self.dhcp.netns) + self._thread.join() + # ProgramExternalActivity is supposed to be used for port forwarding etc., # but we can use it to start the DHCP client in the container's network namespace -# since the interface will have been moved inside at this point. Trying to grab -# the contaienr's attributes (to get the network namespace) will deadlock, so -# we must defer starting the DHCP client +# since the interface will have been moved inside at this point. @app.route('/NetworkDriver.ProgramExternalConnectivity', methods=['POST']) def start_container_dhcp(): req = request.get_json(force=True) endpoint = req['EndpointID'] - - def _deferred(): - iface = endpoint_container_iface(req['NetworkID'], endpoint) - - dhcp = udhcpc.DHCPClient(iface) - container_dhcp_clients[endpoint] = dhcp - logger.info('Starting DHCP client on %s in container namespace %s', iface['ifname'], dhcp.netns) - threading.Thread(target=_deferred).start() + container_dhcp_clients[endpoint] = ContainerDHCPManager(req['NetworkID'], endpoint) return jsonify({}) @@ -273,10 +286,7 @@ def stop_container_dhcp(): endpoint = req['EndpointID'] if endpoint in container_dhcp_clients: - dhcp = container_dhcp_clients[endpoint] - logger.info('Shutting down DHCP client on %s in container namespace %s', dhcp.iface['ifname'], dhcp.netns) - dhcp.finish(timeout=1) - ndb.sources.remove(dhcp.netns) + container_dhcp_clients[endpoint].stop() del container_dhcp_clients[endpoint] return jsonify({}) \ No newline at end of file diff --git a/net-dhcp/udhcpc.py b/net-dhcp/udhcpc.py index 85fd3bd..813fa24 100644 --- a/net-dhcp/udhcpc.py +++ b/net-dhcp/udhcpc.py @@ -6,18 +6,19 @@ import fcntl import time import threading import subprocess -import signal import logging from pyroute2.netns.process.proxy import NSPopen -INFO_PREFIX = '__info' +EVENT_PREFIX = '__event' HANDLER_SCRIPT = path.join(path.dirname(__file__), 'udhcpc_handler.py') AWAIT_INTERVAL = 0.1 class EventType(Enum): BOUND = 'bound' RENEW = 'renew' + DECONFIG = 'deconfig' + LEASEFAIL = 'leasefail' logger = logging.getLogger('gunicorn.error') @@ -50,6 +51,7 @@ class DHCPClient: cmdline.append('-q' if once else '-R') self.proc = Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8') + self._has_lease = threading.Event() self.ip = None self.gateway = None self.domain = None @@ -59,23 +61,29 @@ class DHCPClient: self._event_thread.start() def _attr_listener(self, event_type, args): - if event_type not in (EventType.BOUND, EventType.RENEW): - return - - self.ip = ipaddress.ip_interface(args[0]) - self.gateway = ipaddress.ip_address(args[1]) - self.domain = args[2] + if event_type in (EventType.BOUND, EventType.RENEW): + self.ip = ipaddress.ip_interface(args[0]) + self.gateway = ipaddress.ip_address(args[1]) + self.domain = args[2] + self._has_lease.set() + elif event_type == EventType.DECONFIG: + self._has_lease.clear() + self.ip = None + self.gateway = None + self.domain = None def _read_events(self): while self._running: line = self.proc.stdout.readline().strip() if not line: # stdout will be O_NONBLOCK if udhcpc is in a netns + # We can't use select() since the file descriptor is from + # the NSPopen proxy if self.netns and self._running: time.sleep(0.1) continue - if not line.startswith(INFO_PREFIX): + if not line.startswith(EVENT_PREFIX): logger.debug('[udhcpc#%d] %s', self.proc.pid, line) continue @@ -91,12 +99,9 @@ class DHCPClient: listener(self, event_type, args[1:]) def await_ip(self, timeout=5): - # TODO: this bad - start = time.time() - while not self.ip: - if time.time() - start > timeout: - raise DHCPClientError('Timed out waiting for dhcp lease') - time.sleep(AWAIT_INTERVAL) + if not self._has_lease.wait(timeout=timeout): + raise DHCPClientError('Timed out waiting for dhcp lease') + return self.ip def finish(self, timeout=5): diff --git a/net-dhcp/udhcpc_handler.py b/net-dhcp/udhcpc_handler.py index 5d7553e..4dbc215 100755 --- a/net-dhcp/udhcpc_handler.py +++ b/net-dhcp/udhcpc_handler.py @@ -2,21 +2,17 @@ import sys from os import environ as env -INFO_PREFIX = '__info' +EVENT_PREFIX = '__event' if __name__ != '__main__': print('You shouldn\'t be importing this script!') sys.exit(1) event_type = sys.argv[1] -if event_type == 'bound' or event_type == 'renew': - print(f'{INFO_PREFIX} {event_type} {env["ip"]}/{env["mask"]} {env["router"]} {env["domain"]}') -elif event_type == 'deconfig': - print('udhcpc startup / lost lease') -elif event_type == 'leasefail': - print('udhcpc failed to get a lease') -elif event_type == 'nak': - print('udhcpc received NAK') +if event_type in ('bound', 'renew'): + print(f'{EVENT_PREFIX} {event_type} {env["ip"]}/{env["mask"]} {env["router"]} {env["domain"]}') +elif event_type in ('deconfig', 'leasefail', 'nak'): + print(f'{EVENT_PREFIX} {event_type}') else: print(f'unknown udhcpc event "{event_type}"') sys.exit(1)