2 files changed, 282 insertions, 0 deletions
diff --git a/linux/dev/arch/i386/kernel/irq.c b/linux/dev/arch/i386/kernel/irq.c
index e9dfe6a..4bed353 100644
--- a/linux/dev/arch/i386/kernel/irq.c
+++ b/linux/dev/arch/i386/kernel/irq.c
@@ -45,12 +45,19 @@
 #include <asm/bitops.h>
 #include <asm/irq.h>
 #include <asm/io.h>
+#include <asm/hardirq.h>
 
 extern int linux_timer_intr (void);
 extern spl_t splhigh (void);
 extern spl_t spl0 (void);
 extern void form_pic_mask (void);
 
+#if 0
+/* XXX: This is the way it's done in linux 2.2. GNU Mach currently uses intr_count. It should be made using local_{bh/irq}_count instead (through hardirq_enter/exit) for SMP support. */
+unsigned int local_bh_count[NR_CPUS];
+unsigned int local_irq_count[NR_CPUS];
+#endif
+
 /*
  * XXX Move this into more suitable place...
  * Set if the machine has an EISA bus.
@@ -407,6 +414,277 @@ reserve_mach_irqs (void)
     }
 }
 
+#ifdef __SMP__
+unsigned char global_irq_holder = NO_PROC_ID;
+unsigned volatile int global_irq_lock;
+atomic_t global_irq_count;
+
+atomic_t global_bh_count;
+atomic_t global_bh_lock;
+
+/*
+ * "global_cli()" is a special case, in that it can hold the
+ * interrupts disabled for a longish time, and also because
+ * we may be doing TLB invalidates when holding the global
+ * IRQ lock for historical reasons. Thus we may need to check
+ * SMP invalidate events specially by hand here (but not in
+ * any normal spinlocks)
+ */
+#if 0
+/* XXX: check how Mach handles this */
+static inline void check_smp_invalidate(int cpu)
+{
+	if (test_bit(cpu, &smp_invalidate_needed)) {
+		clear_bit(cpu, &smp_invalidate_needed);
+		local_flush_tlb();
+	}
+}
+#endif
+
+static void show(char * str)
+{
+	int i;
+	unsigned long *stack;
+	int cpu = smp_processor_id();
+	extern char *get_options(char *str, int *ints);
+
+	printk("\n%s, CPU %d:\n", str, cpu);
+	printk("irq:  %d [%d %d]\n",
+		atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
+	printk("bh:   %d [%d %d]\n",
+		atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]);
+	stack = (unsigned long *) &stack;
+	for (i = 40; i ; i--) {
+		unsigned long x = *++stack;
+		//if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) {
+			printk("<[%08lx]> ", x);
+		//}
+	}
+}
+	
+#define MAXCOUNT 100000000
+
+static inline void wait_on_bh(void)
+{
+	int count = MAXCOUNT;
+	do {
+		if (!--count) {
+			show("wait_on_bh");
+			count = ~0;
+		}
+		/* nothing .. wait for the other bh's to go away */
+	} while (atomic_read(&global_bh_count) != 0);
+}
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes.  I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 1
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+	int count = MAXCOUNT;
+
+	for (;;) {
+
+		/*
+		 * Wait until all interrupts are gone. Wait
+		 * for bottom half handlers unless we're
+		 * already executing in one..
+		 */
+		if (!atomic_read(&global_irq_count)) {
+			if (local_bh_count[cpu] || !atomic_read(&global_bh_count))
+				break;
+		}
+
+		/* Duh, we have to loop. Release the lock to avoid deadlocks */
+		clear_bit(0,&global_irq_lock);
+
+		for (;;) {
+			if (!--count) {
+				show("wait_on_irq");
+				count = ~0;
+			}
+			__sti();
+			SYNC_OTHER_CORES(cpu);
+			__cli();
+			//check_smp_invalidate(cpu);
+			if (atomic_read(&global_irq_count))
+				continue;
+			if (global_irq_lock)
+				continue;
+			if (!local_bh_count[cpu] && atomic_read(&global_bh_count))
+				continue;
+			if (!test_and_set_bit(0,&global_irq_lock))
+				break;
+		}
+	}
+}
+
+/*
+ * This is called when we want to synchronize with
+ * bottom half handlers. We need to wait until
+ * no other CPU is executing any bottom half handler.
+ *
+ * Don't wait if we're already running in an interrupt
+ * context or are inside a bh handler. 
+ */
+void synchronize_bh(void)
+{
+	if (atomic_read(&global_bh_count) && !in_interrupt())
+		wait_on_bh();
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+	if (atomic_read(&global_irq_count)) {
+		/* Stupid approach */
+		cli();
+		sti();
+	}
+}
+
+static inline void get_irqlock(int cpu)
+{
+	if (test_and_set_bit(0,&global_irq_lock)) {
+		/* do we already hold the lock? */
+		if ((unsigned char) cpu == global_irq_holder)
+			return;
+		/* Uhhuh.. Somebody else got it. Wait.. */
+		do {
+			do {
+				//check_smp_invalidate(cpu);
+			} while (test_bit(0,&global_irq_lock));
+		} while (test_and_set_bit(0,&global_irq_lock));		
+	}
+	/* 
+	 * We also to make sure that nobody else is running
+	 * in an interrupt context. 
+	 */
+	wait_on_irq(cpu);
+
+	/*
+	 * Ok, finally..
+	 */
+	global_irq_holder = cpu;
+}
+
+#define EFLAGS_IF_SHIFT 9
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+	unsigned int flags;
+
+	__save_flags(flags);
+	if (flags & (1 << EFLAGS_IF_SHIFT)) {
+		int cpu = smp_processor_id();
+		__cli();
+		if (!local_irq_count[cpu])
+			get_irqlock(cpu);
+	}
+}
+
+void __global_sti(void)
+{
+	int cpu = smp_processor_id();
+
+	if (!local_irq_count[cpu])
+		release_irqlock(cpu);
+	__sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+	int retval;
+	int local_enabled;
+	unsigned long flags;
+
+	__save_flags(flags);
+	local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+	/* default to local */
+	retval = 2 + local_enabled;
+
+	/* check for global flags if we're not in an interrupt */
+	if (!local_irq_count[smp_processor_id()]) {
+		if (local_enabled)
+			retval = 1;
+		if (global_irq_holder == (unsigned char) smp_processor_id())
+			retval = 0;
+	}
+	return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+	switch (flags) {
+	case 0:
+		__global_cli();
+		break;
+	case 1:
+		__global_sti();
+		break;
+	case 2:
+		__cli();
+		break;
+	case 3:
+		__sti();
+		break;
+	default:
+		printk("global_restore_flags: %08lx (%08lx)\n",
+			flags, (&flags)[-1]);
+	}
+}
+
+#endif
+
 static int (*old_clock_handler) ();
 static int old_clock_pri;
 
diff --git a/linux/dev/include/asm-i386/smp.h b/linux/dev/include/asm-i386/smp.h
index 96423ae..fabe01d 100644
--- a/linux/dev/include/asm-i386/smp.h
+++ b/linux/dev/include/asm-i386/smp.h
@@ -1,4 +1,8 @@
 #ifndef _I386_SMP_H
 #define _I386_SMP_H
 
+#include <machine/cpu_number.h>
+
+#define smp_processor_id() cpu_number()
+
 #endif /* _I386_SMP_H */