From 34442b6069756c6dd2eeb8f576a921436df18ae8 Mon Sep 17 00:00:00 2001
From: Daniel Barlow <dan@telent.net>
Date: Tue, 12 Mar 2024 23:41:46 +0000
Subject: [PATCH 1/6] failing test for ifwait

---
 tests/ci.nix                   |  1 +
 tests/updown/configuration.nix | 55 ++++++++++++++++++++++++++++++++++
 tests/updown/script.expect     | 29 ++++++++++++++++++
 tests/updown/test.nix          | 19 ++++++++++++
 4 files changed, 104 insertions(+)
 create mode 100644 tests/updown/configuration.nix
 create mode 100644 tests/updown/script.expect
 create mode 100644 tests/updown/test.nix

diff --git a/tests/ci.nix b/tests/ci.nix
index f923e853..888a2d63 100644
--- a/tests/ci.nix
+++ b/tests/ci.nix
@@ -8,4 +8,5 @@
   min-copy-closure =  import ./min-copy-closure/test.nix;
   fennel = import ./fennel/test.nix;
   tftpboot = import ./tftpboot/test.nix;
+  updown = import ./updown/test.nix;
 }
diff --git a/tests/updown/configuration.nix b/tests/updown/configuration.nix
new file mode 100644
index 00000000..38e52db2
--- /dev/null
+++ b/tests/updown/configuration.nix
@@ -0,0 +1,55 @@
+{ config, pkgs, lib, ... } :
+let
+  inherit (pkgs.liminix.services) bundle oneshot longrun;
+  inherit (pkgs) serviceFns;
+  # EDIT: you can pick your preferred RFC1918 address space
+  # for NATted connections, if you don't like this one.
+  ipv4LocalNet = "10.8.0";
+  svc = config.system.service;
+
+in rec {
+  imports = [
+    ../../modules/bridge
+    ../../modules/dhcp6c
+    ../../modules/dnsmasq
+    ../../modules/firewall
+    ../../modules/hostapd
+    ../../modules/network
+    ../../modules/ssh
+    ../../modules/vlan
+    ../../modules/wlan.nix
+  ];
+  rootfsType = "jffs2";
+  hostname = "updown";
+
+  services.int = svc.network.address.build {
+    interface = svc.bridge.primary.build { ifname = "int"; };
+    family = "inet"; address = "${ipv4LocalNet}.1"; prefixLength = 16;
+  };
+
+  services.bridge =  svc.bridge.members.build {
+    primary = services.int;
+    members = with config.hardware.networkInterfaces;
+      [ lan ];
+  };
+
+  services.sshd = svc.ssh.build { };
+
+  # users.root = {
+  #   # EDIT: choose a root password and then use
+  #   # "mkpasswd -m sha512crypt" to determine the hash.
+  #   # It should start wirh $6$.
+  #   passwd = "$6$6HG7WALLQQY1LQDE$428cnouMJ7wVmyK9.dF1uWs7t0z9ztgp3MHvN5bbeo0M4Kqg/u2ThjoSHIjCEJQlnVpDOaEKcOjXAlIClHWN21";
+  #   openssh.authorizedKeys.keys = [
+  #     # EDIT: you can add your ssh pubkey here
+  #     # "ssh-rsa AAAAB3NzaC1....H6hKd user@example.com";
+  #   ];
+  # };
+
+  defaultProfile.packages = with pkgs; [
+    min-collect-garbage
+#    strace
+    #    ethtool
+    tcpdump
+  ];
+}
diff --git a/tests/updown/script.expect b/tests/updown/script.expect
new file mode 100644
index 00000000..89e60c19
--- /dev/null
+++ b/tests/updown/script.expect
@@ -0,0 +1,29 @@
+set timeout 10
+
+spawn socat unix-connect:vm/monitor -
+set monitor_id $spawn_id
+
+expect "(qemu)"
+send "set_link virtio-net-pci.1 off\n"
+expect "(qemu)"
+send "set_link virtio-net-pci.0 off\n"
+expect "(qemu)"
+send "c\r\n"
+spawn socat unix-connect:vm/console -
+set console_id $spawn_id
+
+expect "BusyBox" 
+expect "#" { send "PS1=RE\\ADY_\\ \r" }
+expect "READY_" { send "sleep 3\r" }
+expect "READY_" { send "ip link\r" }
+expect "READY_" { send "cat /sys/class/net/lan/operstate\r" }
+expect {
+  "down" { }
+  "up" { exit 1 }
+}
+expect "READY_" { send "s6-rc -a -u change\r"  }
+expect {
+  "unable to take locks" { exit 1 }
+  "READY_" { send "hostname\r" }
+}
+expect "updown"
diff --git a/tests/updown/test.nix b/tests/updown/test.nix
new file mode 100644
index 00000000..589e3838
--- /dev/null
+++ b/tests/updown/test.nix
@@ -0,0 +1,19 @@
+{
+  liminix
+, nixpkgs
+}:
+let img = (import liminix {
+      device = import "${liminix}/devices/qemu/";
+      liminix-config = ./configuration.nix;
+    }).outputs.vmroot;
+    pkgs = import <nixpkgs> { overlays = [(import ../../overlay.nix)]; };
+in pkgs.runCommand "check" {
+  nativeBuildInputs = with pkgs; [
+    expect
+    socat
+  ] ;
+} ''
+mkdir vm
+${img}/run.sh --flag -S --background ./vm
+expect ${./script.expect} | tee $out
+''

From af52aafc845194b0a6b3aa0e748105b4e53701e8 Mon Sep 17 00:00:00 2001
From: Daniel Barlow <dan@telent.net>
Date: Sat, 16 Mar 2024 20:22:30 +0000
Subject: [PATCH 2/6] deep thoughts

---
 THOUGHTS.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/THOUGHTS.txt b/THOUGHTS.txt
index b4200f8e..0bb66b1f 100644
--- a/THOUGHTS.txt
+++ b/THOUGHTS.txt
@@ -4322,6 +4322,16 @@ set_link virtio-net-pci.0 on
 
 See if both devices are bridge members
 
+disable again,check if back to starting position
+
+
+Wed Mar 13 00:00:16 GMT 2024
+
+aside: "trigger" is the least bad word I've thought of so far for
+these services that stop/start other services
+
+telent: yeah, in general 'ps afuxww' (or s6-ps -H :)) is the way to solve this, look for hung s6-rc processes and in particular their s6-svlisten1 children, where the command line will show what service is still waiting for readiness
+
 Wed Mar 20 19:34:36 GMT 2024
 
 Because I forgot hoe to rebuild rotuer, I tihnk it is time to improve

From fad0a47b7547252330e4f0931c651ef63f3615dd Mon Sep 17 00:00:00 2001
From: Daniel Barlow <dan@telent.net>
Date: Sat, 16 Mar 2024 20:23:18 +0000
Subject: [PATCH 3/6] add config.system.callService

this is like pkgs.callService except that it passes
config.system.service as a param so that the service
being defined can invoke other services

if this proves to be a good idea, all uses of
pkgs.callService should be changed to use it instead
---
 modules/base.nix | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/modules/base.nix b/modules/base.nix
index 8f53df29..4d3d8e4d 100644
--- a/modules/base.nix
+++ b/modules/base.nix
@@ -29,6 +29,10 @@ in {
     services = mkOption {
       type = types.attrsOf type_service;
     };
+    system.callService = mkOption {
+      type = types.functionTo (types.functionTo types.anything);
+    };
+
     filesystem = mkOption {
       type = types.anything;
       description = ''
@@ -111,6 +115,31 @@ in {
       "fw_devlink=off"
     ] ++ lib.optional (config.rootOptions != null) "rootflags=${config.rootOptions}";
 
+    system.callService = path : parameters :
+      let
+        typeChecked = caller: type: value:
+          let
+            inherit (lib) types mergeDefinitions;
+            defs = [{ file = caller; inherit value; }];
+            type' = types.submodule { options = type; };
+          in (mergeDefinitions [] type' defs).mergedValue;
+        cp = lib.callPackageWith(pkgs // { svc = config.system.service; });
+        pkg = cp path {};
+        checkTypes = t : p : typeChecked (builtins.toString path) t p;
+      in {
+        inherit parameters;
+        build = { dependencies ? [], ... } @ args :
+          let
+            s = pkg (checkTypes parameters
+              (builtins.removeAttrs args ["dependencies"]));
+          in s.overrideAttrs (o: {
+            dependencies = (builtins.map (d: d.name) dependencies) ++ o.dependencies;
+            buildInputs = dependencies ++ o.buildInputs;
+          });
+      };
+
+
+
     users.root = {
       uid = 0; gid= 0; gecos = "Root of all evaluation";
       dir = "/home/root/";

From 28a5dec7dd31834394748c9b19411715eeddc43c Mon Sep 17 00:00:00 2001
From: Daniel Barlow <dan@telent.net>
Date: Sat, 16 Mar 2024 20:41:13 +0000
Subject: [PATCH 4/6] implement ifwait trigger service and use in bridge

should we convert all ifwait uses to this trigger too? seems
reasonable
---
 modules/bridge/default.nix |  6 +++--
 modules/bridge/members.nix | 21 ++++++++++++------
 modules/ifwait/default.nix | 18 +++++++++++++++
 modules/ifwait/ifwait.nix  | 15 +++++++++++++
 tests/updown/script.expect | 45 +++++++++++++++++++++++++++++++++-----
 5 files changed, 91 insertions(+), 14 deletions(-)
 create mode 100644 modules/ifwait/default.nix
 create mode 100644 modules/ifwait/ifwait.nix

diff --git a/modules/bridge/default.nix b/modules/bridge/default.nix
index 48fdb125..32df4a5c 100644
--- a/modules/bridge/default.nix
+++ b/modules/bridge/default.nix
@@ -14,6 +14,8 @@ let
   inherit (pkgs) liminix;
 in
 {
+  imports = [ ../ifwait ];
+
   options = {
     system.service.bridge = {
       primary = mkOption { type = liminix.lib.types.serviceDefn; };
@@ -27,7 +29,7 @@ in
         description = "bridge interface name to create";
       };
     };
-    members = liminix.callService ./members.nix {
+    members = config.system.callService ./members.nix {
       primary = mkOption {
         type = liminix.lib.types.interface;
         description = "primary bridge interface";
@@ -47,5 +49,5 @@ in
     # a better way to test for the existence of vlan config:
     # maybe the module should set an `enabled` attribute?
     BRIDGE_VLAN_FILTERING = "y";
-  };    
+  };
 }
diff --git a/modules/bridge/members.nix b/modules/bridge/members.nix
index ca1989e3..a2787305 100644
--- a/modules/bridge/members.nix
+++ b/modules/bridge/members.nix
@@ -2,6 +2,7 @@
   liminix
 , ifwait
 , lib
+, svc
 }:
 { members, primary } :
 
@@ -10,14 +11,20 @@ let
   inherit (liminix.services) bundle oneshot;
   inherit (lib) mkOption types;
   addif = member :
-    oneshot {
-      name = "${primary.name}.member.${member.name}";
-      up = ''
-        dev=$(output ${member} ifname)
-        ${ifwait}/bin/ifwait $dev running && ip link set dev $dev master $(output ${primary} ifname)
-      '';
-      down = "ip link set dev $(output ${member} ifname) nomaster";
+    # how do we get sight of services from here? maybe we need to
+    # implement ifwait as a regualr derivation instead of a
+    # servicedefinition
+    svc.ifwait.build {
+      state = "running";
+      interface = member;
       dependencies = [ primary member ];
+      service = oneshot {
+        name = "${primary.name}.member.${member.name}";
+        up = ''
+          ip link set dev $(output ${member} ifname) master $(output ${primary} ifname)
+        '';
+        down = "ip link set dev $(output ${member} ifname) nomaster";
+      };
     };
 in bundle {
   name = "${primary.name}.members";
diff --git a/modules/ifwait/default.nix b/modules/ifwait/default.nix
new file mode 100644
index 00000000..501f317d
--- /dev/null
+++ b/modules/ifwait/default.nix
@@ -0,0 +1,18 @@
+{ config, pkgs, lib, ... } :
+let
+  inherit (pkgs) liminix;
+  inherit (lib) mkOption types;
+in {
+  options.system.service.ifwait =
+    mkOption { type = liminix.lib.types.serviceDefn; };
+
+  config.system.service.ifwait = config.system.callService ./ifwait.nix {
+    state = mkOption { type = types.str; };
+    interface = mkOption {
+      type = liminix.lib.types.interface;
+    };
+    service = mkOption {
+      type = liminix.lib.types.service;
+    };
+  };
+}
diff --git a/modules/ifwait/ifwait.nix b/modules/ifwait/ifwait.nix
new file mode 100644
index 00000000..4c28b522
--- /dev/null
+++ b/modules/ifwait/ifwait.nix
@@ -0,0 +1,15 @@
+{ ifwait, liminix } :
+{
+  state
+, interface
+, service
+}:
+let
+  inherit (liminix.services) longrun;
+in longrun {
+  name = "ifwait.${interface.name}";
+  buildInputs = [ service ];
+  run = ''
+    ${ifwait}/bin/ifwait -s ${service.name}  $(output ${interface} ifname) ${state}
+  '';
+}
diff --git a/tests/updown/script.expect b/tests/updown/script.expect
index 89e60c19..232f49ad 100644
--- a/tests/updown/script.expect
+++ b/tests/updown/script.expect
@@ -13,9 +13,14 @@ spawn socat unix-connect:vm/console -
 set console_id $spawn_id
 
 expect "BusyBox" 
-expect "#" { send "PS1=RE\\ADY_\\ \r" }
-expect "READY_" { send "sleep 3\r" }
-expect "READY_" { send "ip link\r" }
+expect "#" { send "PS1=RE\\ADY_\\ ; stty -echo \r" }
+expect "READY_" { send "s6-rc -b -a list\r"  } ; # -b waits for s6-rc lock
+expect "READY_" { send "ls /sys/class/net/lan/master\r" }
+expect {
+  "No such file or directory" { }
+  timeout { exit 1 }
+}
+
 expect "READY_" { send "cat /sys/class/net/lan/operstate\r" }
 expect {
   "down" { }
@@ -24,6 +29,36 @@ expect {
 expect "READY_" { send "s6-rc -a -u change\r"  }
 expect {
   "unable to take locks" { exit 1 }
-  "READY_" { send "hostname\r" }
+  "READY_" { send "\r" }
 }
-expect "updown"
+
+set spawn_id $monitor_id
+send "\r"
+expect "(qemu)"
+send "set_link virtio-net-pci.1 on\n"
+expect "(qemu)"
+send "set_link virtio-net-pci.0 on\n"
+expect "(qemu)"
+set spawn_id $console_id
+
+expect "entered forwarding state"
+send "\r"
+expect "READY_" { send "cat /sys/class/net/lan/operstate\r" }
+expect {
+  "down" {  exit 1 }
+  "up" { }
+}
+
+expect "READY_" { send "cat /sys/class/net/lan/master/uevent\r" }
+expect {
+  "INTERFACE=int" { }
+  timeout { exit 1 }
+}
+
+expect "READY_" { send "s6-rc listall int.link.a.10.8.0.1.member.lan.link ; hostname\r"  }
+
+expect {
+  "updown"  {}
+  timeout { exit 1 }
+}
+

From 77f1a783310075bbc205b7f4ff875933687758ad Mon Sep 17 00:00:00 2001
From: Daniel Barlow <dan@telent.net>
Date: Sat, 16 Mar 2024 23:09:41 +0000
Subject: [PATCH 5/6] ifwait block if s6-rc lock is held

otherwise it doesn't trigger the service if something else is
slow to start
---
 pkgs/ifwait/ifwait.fnl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkgs/ifwait/ifwait.fnl b/pkgs/ifwait/ifwait.fnl
index 9af41f95..cc5e76c9 100644
--- a/pkgs/ifwait/ifwait.fnl
+++ b/pkgs/ifwait/ifwait.fnl
@@ -37,8 +37,8 @@
   (when (not (= up wanted?))
     (set up
          (if wanted?
-             (pcall system (.. "s6-rc -u change " service))
-             (not (pcall system (.. "s6-rc -d change " service)))))
+             (pcall system (.. "s6-rc -b -u change " service))
+             (not (pcall system (.. "s6-rc -b -d change " service)))))
     ))
 
 (fn run [args event-fn]

From efcfdcc21d7d410673f04744d880e5ac5f17c759 Mon Sep 17 00:00:00 2001
From: Daniel Barlow <dan@telent.net>
Date: Thu, 28 Mar 2024 20:28:47 +0000
Subject: [PATCH 6/6] think

---
 THOUGHTS.txt | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/THOUGHTS.txt b/THOUGHTS.txt
index 0bb66b1f..6606706f 100644
--- a/THOUGHTS.txt
+++ b/THOUGHTS.txt
@@ -4354,3 +4354,61 @@ just now but it does mean we can punt on specifying the device inside the
 liminix-config which is unreasonably circular.
 
 Maybe we'll just chuck a makefile in telent-nixos-config
+
+Fri Mar 22 22:14:32 GMT 2024
+
+For the service failover milestone we said
+
+a. A configuration demonstrating a service which is restarted when it crashes
+b. A failover config where service B runs iff service A is unavailable
+c. A config showing different pppd behaviour when interface is flakey (retry) vs ppp password is wrong (report error, wait for resolution)
+
+Sun Mar 24 23:41:27 GMT 2024
+
+TODO
+
+1) make liminix-rebuild bounce only affected services instead of
+  full reboot (what does it do about triggered services?)
+2) sniproxy
+
+3) see if arhcive still works. usb disk hotplug would be a good candidate for
+switching to triggers
+
+Mon Mar 25 19:35:47 GMT 2024
+
+to make the liminix-rebuild thing restart only affected services, it needs to
+know when the new service is not like the old one. By default it does not
+restart a service with a changed up/down/run script unless the name has
+also changed, so we need to figure out how to generate a "conversion"
+file with the services that are different
+
+pkgs/s6-rc-database/default.nix creates $out/compiled, we could add
+$out/hashes to this
+
+the other thing making this fun is that we will need to run `activate`
+(which is usually done in preinit) otherwise the new configuration's
+fhs directories won't exist.
+
+so the plan woyuld be
+
+in liminix-rebuild, when reboot was not chosen,
+
+- run activate
+- compare  /run/s6-rc/compiled/hashes (old services) with
+  /etc/s6-rc/compiled/hashes (new services)
+
+- whenever both files have the same column 1 and different
+column 2, add that name to restart list
+
+(need to turn restarts.fnl into a lua script)
+
+s6-rc-update /etc/s6-rc/compiled/hashes restarts
+
+Tue Mar 26 23:18:53 GMT 2024
+
+activate overwrites /etc/s6-rc/compiled, which is a problem because
+s6-rc-update expects to find the old compiled database here so that
+it can know what to update
+
+Maybe config.filesystem should specify /etc/s6-rc/compiled.new
+and something in early boot could symlink /etc/s6-rc/compiled to it