#ifndef __ASM_SMP_H
#define __ASM_SMP_H

/*
 * We need the APIC definitions automatically as part of 'smp.h'
 */
#ifndef __ASSEMBLY__
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/ptrace.h>
#include <asm/desc.h>
#endif

#ifdef CONFIG_X86_LOCAL_APIC
#ifndef __ASSEMBLY__
#include <asm/fixmap.h>
#include <asm/bitops.h>
#include <asm/mpspec.h>
#ifdef CONFIG_X86_IO_APIC
#include <asm/io_apic.h>
#endif
#include <asm/apic.h>
#endif
#endif

#ifdef CONFIG_SMP
# ifdef CONFIG_MULTIQUAD
#  define TARGET_CPUS 0xf     /* all CPUs in *THIS* quad */
#  define INT_DELIVERY_MODE 0     /* physical delivery on LOCAL quad */
# else
#  define TARGET_CPUS cpu_online_map
#  define INT_DELIVERY_MODE 1     /* logical delivery broadcast to all procs */
# endif
#else
# define INT_DELIVERY_MODE 1     /* logical delivery */
# define TARGET_CPUS 0x01
# define smp_per_cpu_data()	per_data(0)
#endif

#ifndef clustered_apic_mode
 #ifdef CONFIG_MULTIQUAD
  #define clustered_apic_mode (1)
  #define esr_disable (1)
 #else /* !CONFIG_MULTIQUAD */
  #define clustered_apic_mode (0)
  #define esr_disable (0)
 #endif /* CONFIG_MULTIQUAD */
#endif 

#ifdef CONFIG_SMP
#ifndef __ASSEMBLY__

/*
 * Private routines/data
 */
 
extern void smp_alloc_memory(void);
extern unsigned long phys_cpu_present_map;
extern unsigned long cpu_online_map;
extern volatile unsigned long smp_invalidate_needed;
extern int pic_mode;
extern void smp_flush_tlb(void);
extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
extern void smp_send_reschedule(int cpu);
extern void smp_invalidate_rcv(void);		/* Process an NMI */
extern void (*mtrr_hook) (void);
extern void zap_low_mappings (void);

/*
 * On x86 all CPUs are mapped 1:1 to the APIC space.
 * This simplifies scheduling and IPI sending and
 * compresses data structures.
 */
static inline int cpu_logical_map(int cpu)
{
	return cpu;
}
static inline int cpu_number_map(int cpu)
{
	return cpu;
}

/*
 * Some lowlevel functions might want to know about
 * the real APIC ID <-> CPU # mapping.
 */
#define MAX_APICID 256
extern volatile int physical_apicid_to_cpu[];
extern volatile int cpu_to_physical_apicid[];
extern volatile int cpu_to_logical_apicid[];
extern volatile int logical_apicid_to_cpu[];

/*
 * General functions that each host system must provide.
 */
 
extern void smp_boot_cpus(void);
extern void smp_store_cpu_info(int id);		/* Store per CPU info (like the initial udelay numbers */

/*
 * This function is needed by all SMP systems. It must _always_ be valid
 * from the initial startup. We map APIC_BASE very early in page_setup(),
 * so this is correct in the x86 case.
 */
extern int dummy_cpu_id;

static inline unsigned get_TR(void) __attribute__ ((pure));
static inline unsigned get_TR(void)
{
	unsigned tr;
	/* The PAIN!  The HORROR!
	 * Technically this is wrong, wrong, wrong, but 
	 * gas doesn't know about strl.  *sigh*  Please 
	 * flog them with a wet noodle repeatedly.
	 * The extra parameter is a dummy value to prevent
	 * gcc from assuming that the value is const across
	 * function calls.  Fun!  -ben
	 */
	__asm__ ("str %w0" : "=r" (tr) : "m" (dummy_cpu_id));
	return tr & 0xffe0;	/* Pentiums leave the high bits undefined. */
}

#define smp_processor_id()	( (get_TR() >> 5) - (__FIRST_TSS_ENTRY >> 2) )

/* There is no way to tell gcc that the low bits of get_TR 
 * are always 0, hence the following macro to produce 
 * optimal code.  -ben
 */
#define smp_per_cpu_data()	\
	( (struct per_cpu_data *)					\
	  ( (get_TR() << (LOG2_PER_CPU_SIZE - 5)) + (long)&aligned_data \
		- (__FIRST_TSS_ENTRY << (LOG2_PER_CPU_SIZE - 2)) ) )

static __inline int hard_smp_processor_id(void)
{
	/* we don't want to mark this access volatile - bad code generation */
	return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
}

static __inline int logical_smp_processor_id(void)
{
	/* we don't want to mark this access volatile - bad code generation */
	return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
}

#endif /* !__ASSEMBLY__ */

#define NO_PROC_ID		0xFF		/* No processor magic marker */

/*
 *	This magic constant controls our willingness to transfer
 *	a process across CPUs. Such a transfer incurs misses on the L1
 *	cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
 *	gut feeling is this will vary by board in value. For a board
 *	with separate L2 cache it probably depends also on the RSS, and
 *	for a board with shared L2 cache it ought to decay fast as other
 *	processes are run.
 */
 
#define PROC_CHANGE_PENALTY	15		/* Schedule penalty */

#endif
#endif
