I heard that context-switch on Mach is really slow?

It is not, there is no real reason why it would be particularly slow, it is just about switching virtual addresses and registers, which all OS have to perform anyway.

A quick-and-dirty benchmark.

You can build this file with:

    gcc -pthread -rt -o context-switch context-switch.c

In context-switch.c write:

    #include <fcntl.h>
    #include <semaphore.h>
    #include <stdio.h>
    #include <time.h>
    #include <unistd.h>
    #include <sys/mman.h>

    sem_t *sem1, *sem2;

    void worker1(void) {
        time_t last;
        int n = 0;
        last = time(NULL);
        while(1) {
            time_t new = time(NULL);
            if (new != last) {
                printf("%d\n", n);
                n = 0;
                last = new;
            }
            n++;
            sem_wait(sem1);
            sem_post(sem2);
        }
    }

    void worker2(void) {
        while(1) {
            sem_post(sem1);
            sem_wait(sem2);
        }
    }

    int fd;
    void get_sems(void) {
        void *ptr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
        sem1 = ptr;
        sem2 = sem1+1;
    }

    int main(void) {
        fd = open("/tmp/foo", O_CREAT|O_TRUNC|O_RDWR, 0666);
        ftruncate(fd, 4096);

        get_sems();
        sem_init(sem1, 1, 0);
        sem_init(sem2, 1, 0);

        if (fork())
            worker1();
        else {
            get_sems();
            worker2();
        }
    }

run on my current Linux system (a Core i5-10210U), gets about 300k switches per second on Linux. Running it on Hurd-in-kvm (which would supposedly be slower) gets about 400k switches per second.