/*
 * MPC8xx Host Controller Interface driver for USB.
 * Brad Parker, brad@parker.boston.ma.us
 *
 * designed for the EmbeddedPlanet RPX lite board
 * (C) Copyright 2000 Embedded Planet
 * http://www.embeddedplanet.com
 *
 * The MPC850/832 processors don't provide either the OHCI or UHCI
 * interface.  They have, however, all the basics needed to be a host
 * controller.
 *
 * [except for the sending of the 1ms SOF frames; for that we need a
 *  microcode path]
 *
 * In the USB world (high speed) every happens inside 1ms frames.  The
 * 1ms frame is the basic scheduling element.
 *
 * A controller needs to schedule frames being send on the bus according
 * to this basic time line:
 *
 * 0                             -- ms --                                   1
 * +------------------------------------------------------------------------+
 * |SOF|   isochronous xfers | interrupt xfers   |control xfers |bulk xfers |
 * +------------------------------------------------------------------------+
 *
 * Isochronous and interrupt transfers are supposed to get 90% of the
 * available bandwidth.  Control transfers get 10%.  The rest is available for
 * bulk transfers.
 *
 * Each 1ms 'frame' is proceeded by an SOF packet containing a frame number.
 *
 * Polling of devices (like keyboards) is done by sending an IN transfer
 * at each frame time.  If the device has no data it responds with NAK.
 * These are 'interrupt' transfers.
 *
 * Setup of devices is done with control transfers.
 *
 * The 8xx has no support for scheduling.  So, we use a 1ms timer to generate
 * an interrupt and run the scheduler in software.  At each SOF time we 
 * add an SOF packet to the tx list and look at the list of packets to send.
 * Packets are added to the tx list based on the priorities shown above.
 *
 * The internal scheduling structures is a "qe" or queue element.  Pending
 * urbs are bound to to a qe and queued on a list by transaction type.
 * At the begining of each frame the list of pending transactions is 
 * scanned and eligible qe's are put on the current frame list.  The
 * driver keeps track of which devices are busy and won't send pending
 * transactions to devices which are in the middle of an existing transaction.
 * The busy nature of a device is separated into input and output pipes so
 * ISO IN transactions can be pending and an ISO OUT can still be sent.
 *
 * So, a qe pends on the class queues until the device it refers to is 
 * idle and then it is moved to a frame list.
 *
 * There are two frame lists, one for the current (in progress) frame
 * and one for the 'next' frame.  Transactions often span multiple
 * frames, either due to NAK's, timeouts or pacing for slow devices.
 * The use of a 'next' frame list gives us a place to put qe's which
 * need to be sent during the next frame.
 *
 * Interrupt transactions which are periodic are kept on a time ordered
 * list, sorted by ms.  A timer interrupt services the list and takes
 * qe's which are ready and puts them on the current frame list.
 *
 * Some notes on the CPM USB implementation:
 *
 * the buffer descriptors for a typical setup transaction should look
 * like this: (note that we don't really do it this way because we need
 * to allow for timeouts and retransmission)
 *
 * #1 3 bytes flags=R,L
 *	buffer -> 0x2d, 0x00, 0x01
 *
 * #2 8 bytes flags=R,L,TC,CNF,DATA0    <-- note, set the CNF bit to wait
 *	buffer -> (8 bytes of config)       for an ACK/NAK/STALL
 *
 * #3 3 bytes flags=R,L,CNF		<-- set CNF to wait for DATA1
 *	buffer -> 0x69, 0x00, 0x01      when tx bd is marked done, the
 *					rx packet is in the rx descriptor
 *					(i.e. the cpm won't mark the tx done
 *					(until the DATAx packet is received)
 *
 * This should
 *	send a SETUP, a DATA0
 *	wait for an ACK (or NAK/STALL)
 *	send a IN
 *	wait for a DATA1
 *
 * ---
 *
 * An engineer from MOT claimed that the internal logic won't complete a
 * transmit descriptor until the response is received.  This implies that
 * if
 * 	the host sends IN
 * 	function responds with DATAx
 *	the host sends ACK,
 *
 * The rx descriptor will complete *before* the transmit descriptor does.
 * (or worst case, at the same time, which is why the isr services the
 *  rx side before the tx side).
 */

#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/malloc.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#include <linux/usb.h>

#include <asm/io.h>
#include <asm/8xx_immap.h>
#include <asm/pgtable.h>
#include <asm/mpc8xx.h>

#if 0
#include "m8xxhci.h"
#endif

#define USB_UCODE_PATCH		/* SOF via external 1Khz signal */
#define DEBUG_CHECKS
#define CONFIG_RPXLITE_CW
#define CONFIG_RPXLITE_DW

/*#include "commproc.h"*/
extern cpm8xx_t *cpmp;          /* Pointer to comm processor */
extern uint m8xx_cpm_dpalloc(uint size);
extern uint m8xx_cpm_dpalloc_index(void);
extern void usb_destroy_configuration(struct usb_device *dev);
extern void cpm_install_handler(int vec, void (*handler)(void *),
				void *dev_id);

#define CPMVEC_TIMER4		((ushort)0x07)
#define CPMVEC_TIMER1		((ushort)0x19)
#define	CPMVEC_SCC1		((ushort)0x1e)

typedef struct cpm_buf_desc {
	ushort	cbd_sc;		/* Status and Control */
	ushort	cbd_datlen;	/* Data length in buffer */
	uint	cbd_bufaddr;	/* Buffer address in host memory */
} cbd_t;

#define BD_SC_EMPTY	((ushort)0x8000)	/* Recieve is empty */
#define BD_SC_READY	((ushort)0x8000)	/* Transmit is ready */
#define BD_SC_WRAP	((ushort)0x2000)	/* Last buffer descriptor */
#define BD_SC_INTRPT	((ushort)0x1000)	/* Interrupt on change */
#define BD_SC_LAST	((ushort)0x0800)	/* Last buffer in frame */
#define BD_SC_CM	((ushort)0x0200)	/* Continous mode */
#define BD_SC_ID	((ushort)0x0100)	/* Rec'd too many idles */
#define BD_SC_P		((ushort)0x0100)	/* xmt preamble */
#define BD_SC_BR	((ushort)0x0020)	/* Break received */
#define BD_SC_FR	((ushort)0x0010)	/* Framing error */
#define BD_SC_PR	((ushort)0x0008)	/* Parity error */
#define BD_SC_OV	((ushort)0x0002)	/* Overrun */
#define BD_SC_CD	((ushort)0x0001)	/* ?? */

#define BD_USB_TC	((ushort)0x0400)	/* transmit crc after last */
#define BD_USB_CNF	((ushort)0x0200)	/* wait for handshake */
#define BD_USB_LSP	((ushort)0x0100)	/* low speed */
#define BD_USB_DATA0	((ushort)0x0080)	/* send data0 pid */
#define BD_USB_DATA1	((ushort)0x00c0)	/* send data1 pid */
#define BD_USB_RX_PID	((ushort)0x00c0)	/* rx pid type bits */
#define BD_USB_RX_DATA0	((ushort)0x0000)	/* rx data0 pid */
#define BD_USB_RX_DATA1	((ushort)0x0040)	/* rx data1 pid */
#define BD_USB_RX_SETUP	((ushort)0x0080)	/* rx setup pid */

/* tx errors */
#define BD_USB_NAK	((ushort)0x0010)	/* NAK received */
#define BD_USB_STAL	((ushort)0x0008)	/* STALL received */
#define BD_USB_TO	((ushort)0x0004)	/* timeout */
#define BD_USB_UN	((ushort)0x0002)	/* usb underrun */

/* rx errors */
#define BD_USB_NONOCT	((ushort)0x0010)	/* non-octet aligned pkt */
#define BD_USB_AB	((ushort)0x0008)	/* frame aborted */
#define BD_USB_CRC	((ushort)0x0004)	/* crc error */

/* FCR bits */
#define FCR_LE	0x08	/* little endian */
#define FCR_BE	0x18	/* big endian */

/* USEPx bits */
#define USEP_TM_CONTROL		0x0000
#define USEP_TM_INTERRUPT	0x0100
#define USEP_TM_BULK		0x0200
#define USEP_TM_ISOCHRONOUS	0x0300
#define USEP_MF_ENABLED		0x0020
#define USEP_RTE_ENABLED	0x0010
#define USEP_THS_NORMAL		0x0000
#define USEP_THS_IGNORE		0x0004
#define USEP_RHS_NORMAL		0x0000
#define USEP_RHS_IGNORE		0x0001
		
/* USMOD bits */
#define USMOD_LSS	0x80
#define USMOD_RESUME	0x40
#define USMOD_TEST	0x04
#define USMOD_HOST	0x02
#define USMOD_EN	0x01

/* USBER bits */	
#define BER_RESET	0x0200
#define BER_IDLE	0x0100
#define BER_TXE3	0x0080
#define BER_TXE2	0x0040
#define BER_TXE1	0x0020
#define BER_TXE0	0x0010
#define BER_SOF		0x0008
#define BER_BSY		0x0004
#define BER_TXB		0x0002
#define BER_RXB		0x0001

/* USB tokens */
#define SOF	0xa5
#define OUT	0xe1
#define IN	0x69
#define SETUP	0x2d
#define DATA0	0xc3
#define DATA1	0x4b
#define ACK	0xd2

/* Rx & Tx ring sizes */

/* note: usb dictates that we need to be able to rx 64 byte frames;
 * the CPM wants to put 2 bytes of CRC at the end and requires that
 * the rx buffers be on a 4 byte boundary.  So, we add 4 bytes of
 * padding to the 64 byte min.
 */
#if 0 /* small, for debug */
#define CPM_USB_RX_PAGES	1
#define CPM_USB_RX_FRSIZE	(64+4)
#define CPM_USB_RX_FRPPG	(PAGE_SIZE / CPM_USB_RX_FRSIZE)
#define RX_RING_SIZE		(CPM_USB_RX_FRPPG * CPM_USB_RX_PAGES)
#define TX_RING_SIZE		10
#endif

#if 0 /* med, for debug */
#define CPM_USB_RX_PAGES	1
#define CPM_USB_RX_FRSIZE	(64+4)
#define CPM_USB_RX_FRPPG	(PAGE_SIZE / CPM_USB_RX_FRSIZE)
#define RX_RING_SIZE		(CPM_USB_RX_FRPPG * CPM_USB_RX_PAGES)
#define TX_RING_SIZE		64
#endif

#if 1
#define CPM_USB_RX_PAGES	8
#define CPM_USB_RX_FRSIZE	(1024)
#define CPM_USB_RX_FRPPG	(PAGE_SIZE / CPM_USB_RX_FRSIZE)
#define RX_RING_SIZE		(CPM_USB_RX_FRPPG * CPM_USB_RX_PAGES)
#define TX_RING_SIZE		40
#endif

/* this is the max size we tell the CPM */
#define MAX_RBE	(CPM_USB_RX_FRSIZE)	/* max receive buffer size (bytes) */


/* MPC850 USB parameter RAM */
typedef struct usbpr {
	ushort	usb_epbptr[4];
	uint	usb_rstate;
	uint	usb_rptr;
	ushort	usb_frame_n;
	ushort	usb_rbcnt;
	uint	usb_rtemp;
} usbpr_t;

/* USB endpoint parameter block */
typedef struct epb {
	ushort	epb_rbase;
	ushort	epb_tbase;
	u_char	epb_rfcr;
	u_char	epb_tfcr;
	ushort	epb_mrblr;
	ushort	epb_rbptr;
	ushort	epb_tbptr;
	uint	epb_tstate;
	uint	epb_tptr;
	ushort	epb_tcrc;
	ushort	epb_tbcnt;
} epb_t;

/* MPC850 USB registers - mapped onto SCC1 address space */
typedef struct usbregs {
	u_char	usb_usmod;
	u_char	usb_usadr;
	u_char	usb_uscom;
	char	res0;
	ushort	usb_usep[4];
	char	res1[4];
	ushort	usb_usber;
	ushort	res2;
	ushort	usb_usbmr;
	u_char	res3;
	u_char	usb_usbs;
	u_char	res4[8];
} usbregs_t;

/* bits in parallel i/o port registers that have to be cleared to
 * configure the pins for SCC1 USB use.
 */
#define PA_USB_RXD	((ushort)0x0001)
#define PA_USB_OE	((ushort)0x0002)
#define PC_USB_RXP	((ushort)0x0010)
#define PC_USB_RXN	((ushort)0x0020)
#define PC_USB_TXP	((ushort)0x0100)
#define PC_USB_TXN	((ushort)0x0200)

#define CPMVEC_USB	CPMVEC_SCC1

struct m8xxhci_device {
	struct usb_device	*usb;
	int			busy[2];
	int			busy_count[2];
};
#define MAX_DEV_BUSYS	100

#define m8xxhci_to_usb(m8xxhci)	((m8xxhci)->usb)
#define usb_to_m8xxhci(usb)	((struct m8xxhci_device *)(usb)->hcpriv)

#include <linux/list.h>

/* interrupt request entry */
struct m8xxhci_irq {
	struct list_head irq_list;	/* Active interrupt list.. */
	int inuse;			/* Inuse? */
	int pipe;
	int period;
	int data_len;
	u_char *data;
	usb_device_irq completed;	/* Completion handler routine */
	void *dev_id;
	int status;
	char buffer[256];
};

/* queue entry */
struct m8xxhci_qe {
	int inuse;			/* Inuse? */
	int retries;
#define MAX_QE_RETRIES	3
	int busys;
#define MAX_QE_STALLED	5
#define MAX_QE_BUSYS	10
	int qtype;
	int qstate;
#define QS_SETUP	1
#define QS_SETUP2	2	
#define QS_SETUP3	3
#define QS_INTR		4
#define QS_BULK		5
#define QS_ISO		6
	unsigned int pipe;		/* pipe info given */
	u_char devnum;
	u_char endpoint;
	void *cmd;
	void *data;
	int whichdata;			/* data0/1 marker */
	int data_len;			/* size of whole xfer */
	int recv_len;			/* IN/size recv so far */
	int send_len;			/* OUT/size sent so far */
	int status;
	int maxpacketsize;		/* max in/out size */
	int reschedule;			/* flag - needs reschedule */
	int shortread;			/* flag - short read */
	int iso_ptr;			/* index into urb->iso_frame_desc */
	u_char *iso_data;		/* ptr to data for current iso frame */
	u_char ph[3];			/* temp packet header */

	wait_queue_head_t wakeup;

	struct usb_device *dev;
	struct urb *urb;

	struct m8xxhci_qe *next; /* for delay list */
	struct list_head frame_list;
	struct list_head qe_list;

	int delta;		/* delay (in ms) till this is due */
};

#define Q_ISO		0
#define Q_INTR		1
#define Q_CTRL		2
#define Q_BULK		3
#define MAX_Q_TYPES	4

struct m8xxhci_frame {
	int total_bytes;
	int bytes[MAX_Q_TYPES];
	struct list_head heads[MAX_Q_TYPES];
};

#define BYTES_PER_USB_FRAME	1280 /*1500*/

/* cumulative percentages, for enforcing max % of frame by class */
static int frame_cumul_class_quota[MAX_Q_TYPES] = {
	(BYTES_PER_USB_FRAME * 90) / 100,	/* iso       90% */
	(BYTES_PER_USB_FRAME * 90) / 100,	/* interrupt 90% */
	BYTES_PER_USB_FRAME,			/* control   remaining 10% */
	BYTES_PER_USB_FRAME			/* bulk      remaining% */
};

/*
 * this doesn't really need to be a structure, since we can only have
 * one mcp usb controller, but it makes things more tidy...
 */
struct m8xxhci_private {
	struct usb_bus *bus;
	struct m8xxhci_device *root_hub; /* Root hub device descriptor.. */

	epb_t *epbptr[4];	/* epb ptr */
	cbd_t *rbase;		/* rx ring bd ptr */
	cbd_t *tbase;		/* tx ring bd ptr */

	int rxnext;		/* index of next rx to be filled */
	int txlast;		/* index of last tx bd fill */
	int txnext;		/* index of next available tx bd */
	int txfree;		/* count of free tx bds */
	int frame_no;		/* frame # send in next SOF */
	u_char sof_pkt[3];	/* temp buffer for sof frames */
	int need_sof;		/* 1ms interrupt could not send flag */
	int ms_count;
	int need_query;

#define M8XXHCI_MAXQE	32
	struct m8xxhci_qe queues[MAX_Q_TYPES][M8XXHCI_MAXQE];
	struct list_head qe_list[MAX_Q_TYPES];

	struct m8xxhci_qe *active_qe;

	int xmit_state[MAX_Q_TYPES];
#define XS_IDLE		0
#define XS_SETUP	1
#define XS_IN		2
	struct m8xxhci_qe *tx_bd_qe[TX_RING_SIZE];

	int driver_state;
#define DS_INIT		0
#define DS_FIND_DEVICE	1
#define DS_READY	2
#define DS_MISSING	3

	struct list_head urb_list; /* active urb list.. */

	struct m8xxhci_frame frames[2];
	struct m8xxhci_frame *current_frame;
	struct m8xxhci_frame *next_frame;

	/* stats */
	struct {
		ulong interrupts;
		ulong rxb;
		ulong txb;
		ulong bsy;
		ulong sof;
		ulong txe[4];
		ulong idle;
		ulong reset;
		ulong tx_err;
		ulong tx_nak;
		ulong tx_stal;
		ulong tx_to;
		ulong tx_un;

		ulong rx_err;
		ulong rx_crc;
		ulong rx_abort;
		ulong rx_nonoct;

		ulong retransmit;
		ulong restart_tx;
	} stats;
};

static volatile struct m8xxhci_private *m8xxhci_ptr;
static wait_queue_head_t m8xxhci_configure;
static int m8xxhci_events;
#define EV_IDLE		0x01
#define EV_RESET	0x02
#define EV_QUERY	0x04
static volatile unsigned long m8xxhci_timer_ticks;
static volatile int m8xxhci_debug = 0;
static volatile int m8xxhci_verbose = 0;

void cleanup_drivers(void);
void m8xxhci_kick_xmit(void);
void m8xxhci_flush_xmit(void);
void m8xxhci_flush_recv(void);
void m8xxhci_stop_controller(void);
int m8xxhci_start_controller(void);
static void m8xxhci_interrupt(void *);
static int m8xxhci_unlink_urb(urb_t *urb);

static inline cbd_t *next_bd(void);
static inline cbd_t *next_bd_qe(struct m8xxhci_qe *qe);
static void complete_iso_rx(struct m8xxhci_qe *qe);
static void process_done_rxbds(void);
static void process_done_txbds(void);
static void mark_device_busy(struct m8xxhci_qe *qe);
static void mark_device_idle(struct m8xxhci_qe *qe);
static int send_qe(struct m8xxhci_qe *qe);
static void enqueue_qe(struct m8xxhci_qe *qe, int qtype);
static struct m8xxhci_qe *allocate_qe(struct m8xxhci_device *dev, int qtype);
static void deallocate_qe(struct m8xxhci_qe *qe);
static void advance_qe_state(struct m8xxhci_qe *qe);
static void make_active_qe(struct m8xxhci_qe *qe);
static void make_inactive_qe(struct m8xxhci_qe *qe);
static void make_inactive_qe_idle_device(struct m8xxhci_qe *qe);
static void run_queues(void);
static int unlink_urb(urb_t *urb, int qtype);
static struct m8xxhci_device *add_local_dev(struct usb_device *usb_dev);

static void assert_resume(int disable);
static void assert_reset(int disable);

/* debug */
static void dump_tx_state(void);
static void dump_tx_bds(char *str);
static void dump_rx_bds(char *str);
static void dump_state(int stats, int rx, int tx);


/* ---- */

#if 1
/* this is really poor 'asm()' use, but hey, it works... */
//extern long long _get_TBR(void);
long long _get_TBR(void)
{
        asm volatile ("1: mftbu   %r3");
        asm volatile ("mftb    %r4");
	asm volatile ("mftbu   %r5");
	asm volatile ("cmpw    %r5,%r3");
        asm volatile ("bne     1b");
        asm volatile ("isync");
}

#else
#define _get_TBR()	(0)
#endif

#define MAX_EVENTS	500 /*4000*//*2000*/
static int e_count;
static int e_wrapped;
static int e_level;
static struct {
	long long e_time;
	char *e_str;
	int e_num;
} events[MAX_EVENTS];

static spinlock_t event_lock = SPIN_LOCK_UNLOCKED;

static void
dump_events(void)
{
	int i, count;
	u_long t1, t2;
	unsigned long flags;

	spin_lock_irqsave(&event_lock, flags);

	count = e_wrapped ? MAX_EVENTS : e_count;

	for (i = 0; i < count; i++) {
		t1 = events[i].e_time >> 32;
		t2 = events[i].e_time;
		printk("%08x:%08x %s %d (0x%x)\n",
		       (int)t1, (int)t2, events[i].e_str,
		       events[i].e_num, events[i].e_num);
	}
	e_count = 0;
	e_wrapped = 0;

	spin_unlock_irqrestore(&event_lock, flags);
}

void
m8xxhci_dump_events(void)
{
	if (0) dump_events();
}


static void
reset_events(void)
{
	e_count = 0;
}

static void
set_event_level(int l)
{
	e_level = l;
}

static void
log_event(int level, char *s, int n)
{
	if (level > e_level)
		return;

#if 1
	if (e_count >= MAX_EVENTS) {
		e_count = 0;
		e_wrapped++;
	}
#endif

	if (e_count < MAX_EVENTS) {
		events[e_count].e_time = _get_TBR();
		events[e_count].e_str = s;
		events[e_count].e_num = n;
		e_count++;
	}
#if 0
	else {
		printk("EVENT LOG FULL!\n");
		dump_events();
	}
#endif
}

/*
Universal Serial Bus Specification Revision 1.1 158

8.3.5 Cyclic Redundancy Checks

Cyclic redundancy checks (CRCs) are used to protect all non-PID fields
in token and data packets. In this context, these fields are
considered to be protected fields. The PID is not included in the CRC
check of a packet containing a CRC. All CRCs are generated over their
respective fields in the transmitter before bit stuffing is
performed. Similarly, CRCs are decoded in the receiver after stuffed
bits have been removed. Token and data packet CRCs provide 100%
coverage for all single- and double-bit errors. A failed CRC is
considered to indicate that one or more of the protected fields is
corrupted and causes the receiver to ignore those fields, and, in most
cases, the entire packet.

For CRC generation and checking, the shift registers in the generator
and checker are seeded with an all-ones pattern. For each data bit
sent or received, the high order bit of the current remainder is XORed
with the data bit and then the remainder is shifted left one bit and
the low-order bit set to zero. If the result of that XOR is one, then
the remainder is XORed with the generator polynomial.

When the last bit of the checked field is sent, the CRC in the
generator is inverted and sent to the checker MSb first. When the last
bit of the CRC is received by the checker and no errors have occurred,
the remainder will be equal to the polynomial residual.

A CRC error exists if the computed checksum remainder at the end of a
packet reception does not match the residual.

Bit stuffing requirements must
be met for the CRC, and this includes the need to insert a zero at the
end of a CRC if the preceding six bits were all ones.

8.3.5.1 Token CRCs

A five-bit CRC field is provided for tokens and covers the ADDR and
ENDP fields of IN, SETUP, and OUT tokens or the time stamp field of an
SOF token. The generator polynomial is:

	G(X) = X 5 + X 2 + 1

The binary bit pattern that represents this polynomial is 00101B. If
all token bits are received without error, the five-bit residual at
the receiver will be 01100B.

*/

static unsigned int polynomial = 0x0014;

static int
do_crc(int in, int bits)
{
	unsigned char temp;
	unsigned int crc;

	crc = 0x1f;			/* initial CRC */

	while (bits-- > 0) {
		temp = in ^ crc; 	/* do next bit */
		crc /= 2;		/* update CRC */
		if (temp & 0x01) 	/* if LSB XOR == 1 */
			crc ^= polynomial; /* then XOR polynomial with CRC */
		in /= 2;		/* next bit */
	}

	return crc;
}

static int
calc_crc5(int addr, int endpoint) 
{
	int bytes, final, crc;

	bytes = (endpoint << 7) | addr;

	crc = (~do_crc(bytes, 11)) & 0x1f;

	final = (crc << 11) | bytes;
	if (0) printk("crc 0x%x, final %08x\n", crc, final);

	return final;
}

/* ----- */

static spinlock_t framelist_lock = SPIN_LOCK_UNLOCKED;

/* put qe on end of list of qe's for next frame */
static void
add_to_next_frame(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	unsigned long flags;

	spin_lock_irqsave(&framelist_lock, flags);

	if (0) printk("add_to_next_frame(qe=%p)\n", qe);
	log_event(3, "add_to_next_frame qe", (int)qe);

	hp->next_frame->total_bytes += qe->data_len;
	hp->next_frame->bytes[qe->qtype] += qe->data_len;

	list_add_tail(&qe->frame_list, &hp->next_frame->heads[qe->qtype]);

	spin_unlock_irqrestore(&framelist_lock, flags);
}

static void
add_to_current_frame(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	unsigned long flags;

	spin_lock_irqsave(&framelist_lock, flags);

	if (0) printk("add_to_current_frame(qe=%p)\n", qe);
	log_event(3, "add_to_current_frame qe", (int)qe);
	if (0) log_event(3, "active_qe", (int)hp->active_qe);

	hp->current_frame->total_bytes += qe->data_len;
	hp->current_frame->bytes[qe->qtype] += qe->data_len;

	list_add_tail(&qe->frame_list, &hp->current_frame->heads[qe->qtype]);

	spin_unlock_irqrestore(&framelist_lock, flags);
}


static struct m8xxhci_qe *
take_from_current_frame(int qtype)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	unsigned long flags;
	struct list_head *list;
	struct m8xxhci_qe *qe = NULL;

	spin_lock_irqsave(&framelist_lock, flags);

	list = &hp->current_frame->heads[qtype];

	if (!list_empty(list)) {
		/* take the top of the list */
		qe = list_entry(list->next, struct m8xxhci_qe, frame_list);

		if (0) printk("take_from_current_frame(qtype=%d) "
			      "top %p, top->next %p, list %p, qe %p\n",
			      qtype, list->next, list->next->next, list, qe);
		log_event(3, "take_from_current_frame qe", (int)qe);

		list_del(&qe->frame_list);
		INIT_LIST_HEAD(&qe->frame_list);

		hp->current_frame->total_bytes -= qe->data_len;
		hp->current_frame->bytes[qe->qtype] -= qe->data_len;
	}

	spin_unlock_irqrestore(&framelist_lock, flags);

	return qe;
}

static void
dump_frame_list(char *what, struct m8xxhci_frame *f)
{
	int i;
	struct list_head *head, *l;
	struct m8xxhci_qe *qe;
	unsigned long flags;

	spin_lock_irqsave(&event_lock, flags);

	if (f == 0) {
	  printk("%s: %p; <unset>\n", what, f);
	  return;
	}

	printk("%s: %p, total_bytes %d\n", what, f, f->total_bytes);
	for (i = 0; i < MAX_Q_TYPES; i++) {
		printk("[%d] bytes %d, head %p next %p prev %p\n",
		       i, f->bytes[i],
		       &f->heads[i], f->heads[i].next, f->heads[i].prev);

		head = &f->heads[i];
		for (l = head->next; l != head; l = l->next) {
			qe = list_entry(l, struct m8xxhci_qe, qe_list);
			printk(" l %p, next %p, prev %p, qe %p\n",
			       l, l->next, l->prev, qe);
			printk("  qe->urb %p, state %d, status %d\n",
			       qe->urb, qe->qstate, qe->status);
		}
	}

	spin_unlock_irqrestore(&event_lock, flags);
}

static void
dump_frame_lists(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	printk("frame lists: current %p, next %p\n",
	       hp->current_frame, hp->next_frame);

	dump_frame_list("current", hp->current_frame);
	dump_frame_list("next", hp->next_frame);
}

/* ----- */

static spinlock_t queue_lock = SPIN_LOCK_UNLOCKED;
static int queues_busy;

static spinlock_t txbd_list_lock = SPIN_LOCK_UNLOCKED;
static int txbd_list_busy;

static struct m8xxhci_qe *delay_qe_list;
static int qdbg = 0;

static int
map_pipe_to_qtype(int pipe)
{
	switch (usb_pipetype(pipe)) {
	case PIPE_CONTROL:	return Q_CTRL;
	case PIPE_INTERRUPT:	return Q_INTR;
	case PIPE_BULK:		return Q_BULK;
	case PIPE_ISOCHRONOUS:	return Q_ISO;
	}

#ifdef DEBUG_CHECKS
	printk("map_pipe_to_qtype(%x) -> unknown pipe type!\n", pipe);
#endif

	return 0;
}

/* allocate an internal queue entry for sending frames */
static struct m8xxhci_qe *
allocate_qe(struct m8xxhci_device *dev, int qtype)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct m8xxhci_qe *qe;
	int inuse;

	if (0) printk("allocate_qe(dev=%p,qtype=%d)\n", dev, qtype);

	qe = hp->queues[qtype];

	while ((inuse = test_and_set_bit(0, &qe->inuse)) != 0 &&
	       qe < &hp->queues[qtype][M8XXHCI_MAXQE])
	{
		qe++;
	}

#ifdef DEBUG_CHECKS
	if (qe == &hp->queues[qtype][M8XXHCI_MAXQE])
		inuse = 1;
#endif

	if (!inuse) {
		qe->qtype = qtype;
		qe->qstate = 0;
		qe->retries = 0;
		qe->busys = 0;
		qe->recv_len = 0;
		qe->send_len = 0;
		qe->reschedule = 0;
		qe->shortread = 0;
		qe->dev = 0;
		qe->urb = 0;
		qe->iso_ptr = 0;

		INIT_LIST_HEAD(&qe->frame_list);
		INIT_LIST_HEAD(&qe->qe_list);
		init_waitqueue_head(&qe->wakeup);

		if (0) printk("allocate_qe(dev=%p,qtype=%d) -> %p\n",
			      dev, qtype, qe);
		return(qe);
	}

	printk("m8xxhci: out of qe's for dev %p\n", dev);
	return(NULL);
}

static void
deallocate_qe(struct m8xxhci_qe *qe)
{
	if (0) printk("deallocate_qe(qe=%p)\n", qe);
	log_event(2, "deallocate_qe qe", (int)qe);
	clear_bit(0, &qe->inuse);
}

/* remove qe from pending list */
static void
dequeue_qe(struct m8xxhci_qe *qe)
{
	if (!list_empty(&qe->qe_list)) {
		list_del(&qe->qe_list);
		INIT_LIST_HEAD(&qe->qe_list);
	}

}

/* place internal queue entry at end of queue */
static void
enqueue_qe(struct m8xxhci_qe *qe, int qtype)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	if (0) printk("enqueue_qe(qe=%p,qtype=%d)\n", qe, qtype);

	INIT_LIST_HEAD(&qe->qe_list);
	list_add_tail(&qe->qe_list, &hp->qe_list[qtype]);
}

static void
dump_pending_qe_list(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct list_head *head, *l;
	struct m8xxhci_qe *qe;
	unsigned long flags;
	int i;

	spin_lock_irqsave(&queue_lock, flags);

	for (i = 0; i < MAX_Q_TYPES; i++) {
		printk("qtype %d:\n", i);

		head = &hp->qe_list[i];

		if (list_empty(head))
			continue;

		for (l = head->next; l != head; l = l->next)
		{
			qe = list_entry(l, struct m8xxhci_qe, qe_list);

			printk("qe %p, next %p, urb %p, delta %d, next %p\n",
			       qe,
			       list_entry(&qe->qe_list.next,
					  struct m8xxhci_qe,
					  qe_list),
			       qe->urb, qe->delta, qe->next);
			printk("  devnum %d, state %d, reschedule %d\n",
			       qe->devnum, qe->qstate, qe->reschedule);
		}
	}

	spin_unlock_irqrestore(&queue_lock, flags);
}

/* remove qe from any list it might be on and reset qe & driver state */
static void
deactivate_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	unsigned long flags;
	struct m8xxhci_qe *q = NULL;

	spin_lock_irqsave(&queue_lock, flags);

	qe->qstate = 0;

	/* if active, reset state */
	if (hp->active_qe == qe) {
		make_inactive_qe_idle_device(qe);
	}

	/* if on current/next frame list, remove */
	if (!list_empty(&qe->frame_list)) {
		list_del(&qe->frame_list);
		INIT_LIST_HEAD(&qe->frame_list);
	}

	/* if on delay list, remove */
	if (delay_qe_list == qe) {
		delay_qe_list = qe->next;
		q = delay_qe_list;
	} else {
		for (q = delay_qe_list; q; q = q->next) {
			if (q->next == qe) {
				q->next = qe->next;
				break;
			}
		}
	}

	if (q) {
		q->delta += qe->delta;
	}
	
	spin_unlock_irqrestore(&queue_lock, flags);
}

static void
make_active_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	hp->active_qe = qe;
}

static void
make_inactive_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	hp->active_qe = 0;
	hp->xmit_state[qe->qtype] = XS_IDLE;
}

static void
make_inactive_qe_idle_device(struct m8xxhci_qe *qe)
{
	make_inactive_qe(qe);
	mark_device_idle(qe);
}

static int
complete_qe(struct m8xxhci_qe *qe, int status)
{
	struct urb *urb = qe->urb;

	log_event(1, "complete_qe qe", (int)qe);
	log_event(3, "complete_qe status", status);

	qe->status = status;

#ifdef DEBUG_CHECKS
	if (status != 0) {
//		dump_events();
		if (m8xxhci_verbose > 1 || status != -1)
			printk("complete_qe(qe=%p,status=%d)\n", qe, status);
	}
#endif

	if (urb) {
		urb->status = status == 0 ? USB_ST_NOERROR : USB_ST_TIMEOUT;

		if (urb->complete) {
			urb->actual_length = usb_pipein(urb->pipe) ?
				qe->recv_len : qe->send_len;

			if (m8xxhci_verbose && status != 0)
				printk("complete urb %p, "
				       "actual_length %d, status %d\n",
				       urb, urb->actual_length, status);

			urb->complete(urb);

			/* interrupt urbs restart themselves */
			switch (qe->qtype) {
			case Q_INTR:
				if (urb->interval) {
					qe->recv_len = 0;
					qe->send_len = 0;
					qe->retries = 0;
					qe->busys = 0;
					qe->reschedule = 1;
				} else {
					unlink_urb(urb, Q_INTR);
				}
				break;
			default:
				unlink_urb(urb, qe->qtype);
			}
		}
	}

	if (waitqueue_active(&qe->wakeup))
		wake_up(&qe->wakeup);

	return 0;
}

/* abort a qe; only works if it's active or just dequeued */
static void
abort_qe(struct m8xxhci_qe *qe)
{
	log_event(1, "abort_qe qe", (int)qe);
	deactivate_qe(qe);
	complete_qe(qe, -1);
}

static void
wait_for_qe(struct m8xxhci_qe *qe)
{
	DECLARE_WAITQUEUE (wait, current);

	log_event(3, "wait_for_qe qe", (int)qe);
	if (0) printk("wait_for_qe(qe=%p) urb=%p\n", qe, qe->urb);

	set_current_state(TASK_UNINTERRUPTIBLE);
	add_wait_queue(&qe->wakeup, &wait);

//	schedule_timeout(HZ/10);
	schedule_timeout(HZ);

	remove_wait_queue(&qe->wakeup, &wait);
	set_current_state(TASK_RUNNING);

	if (qe->status > 0) {
		log_event(1, "wait_for_qe timeout qe", (int)qe);
		if (1) printk("wait_for_qe(qe=%p) timeout; urb %p\n",
			      qe, qe->urb);
		abort_qe(qe);
	}

	log_event(3, "wait_for_qe done qe", (int)qe);
	if (qe->status != 0) {
		if (m8xxhci_verbose > 1 || qe->status != -1)
			printk("wait_for_qe(qe=%p) done; urb %p, status %d\n",
			       qe, qe->urb, qe->status);
	}
}

static int
lock_queues(struct m8xxhci_private *hp)
{
	unsigned long flags;
	spin_lock_irqsave(&queue_lock, flags);
	if (queues_busy) {
		spin_unlock_irqrestore(&queue_lock, flags);
		return -1;
	}
	queues_busy++;
	spin_unlock_irqrestore(&queue_lock, flags);
	return 0;
}

static void
unlock_queues(struct m8xxhci_private *hp)
{
	unsigned long flags;
	spin_lock_irqsave(&queue_lock, flags);
	queues_busy--;
	spin_unlock_irqrestore(&queue_lock, flags);
}

static int
device_busy(struct m8xxhci_qe *qe)
{
	struct m8xxhci_device *dev;
	int io;

	dev = qe->dev ? usb_to_m8xxhci(qe->dev) : NULL;

	if (dev) {
		io = usb_pipein(qe->pipe) ? 0 : 1;
		if (dev->busy[io]) {
			if (1) {
				log_event(3, "dev is busy dev", (int)dev);
				log_event(3, "dev is busy io", io);
			}
			dev->busy_count[io]++;
			if (dev->busy_count[io] > MAX_DEV_BUSYS) {
				int devnum;
				devnum = dev ? dev->usb->devnum : 0;

				if (m8xxhci_verbose)
					printk("m8xxhci: EXCESSIVE BUSYS "
					       "on device %d!\n", devnum);
				log_event(1, "excessive busys", devnum);
#ifdef DEBUG_CHECKS
				dump_events();
#endif

			}
			return 1;
		}
	}

	return 0;
}

static int
device_hung(struct m8xxhci_qe *qe)
{
	struct m8xxhci_device *dev;
	int io;

	dev = qe->dev ? usb_to_m8xxhci(qe->dev) : NULL;

	if (dev) {
		io = usb_pipein(qe->pipe) ? 0 : 1;
		if (dev->busy[io] && dev->busy_count[io] > MAX_DEV_BUSYS) {
			return 1;
		}
	}

	return 0;
}

static void
mark_device_busy(struct m8xxhci_qe *qe)
{
	struct m8xxhci_device *dev;
	int io;

	dev = qe->dev ? usb_to_m8xxhci(qe->dev) : NULL;

	if (dev) {
		io = usb_pipein(qe->pipe) ? 0 : 1;
		if (1) {
			log_event(3, "mark dev busy dev", (int)dev);
			log_event(3, "mark dev busy io", io);
		}
		dev->busy[io]++;
	}
}

static void
mark_device_idle(struct m8xxhci_qe *qe)
{
	struct m8xxhci_device *dev;
	int io;

	dev = qe->dev ? usb_to_m8xxhci(qe->dev) : NULL;

	if (dev) {
		io = usb_pipein(qe->pipe) ? 0 : 1;
		if (1) {
			log_event(3, "mark dev idle dev", (int)dev);
			log_event(3, "mark dev idle io", io);
		}
		dev->busy[io]--;
		if (dev->busy[io] == 0)
			dev->busy_count[io] = 0;
	}
}

/*
 * scan the pending work list, looking for qe's for idle devices
 * add them to the current frame's qe list and mark the device busy
 */
static void
run_queues(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct list_head *head, *l, *next;
	struct m8xxhci_qe *qe;
	int i, j, prev_classes;

	/* if we can't lock, exit */
	if (lock_queues(hp))
		return;

	for (i = 0; i < MAX_Q_TYPES; i++) {

		/* calculate bytes in higher priority classes */
		prev_classes = 0;
		for (j = 0; j <= i; j++)
			prev_classes += hp->current_frame->bytes[i];

		if (0) printk("run_queues() [%d] next %p, head %p\n",
			      i, hp->qe_list[i].next, &hp->qe_list[i]);

		head = &hp->qe_list[i];

		/* scan pending qe's */
		for (l = head->next; l != head; l = next)
		{
			qe = list_entry(l, struct m8xxhci_qe, qe_list);
			next = l->next;

			if (0) printk("top: head %p, l %p, qe %p\n",
				      &hp->qe_list[i], l, qe);
			log_event(3, "run_queues qe", (int)qe);

			/* find ones for devices which are not busy */
			if (device_busy(qe)) {
				log_event(3, "dev busy qe", (int)qe);

				/* if device is hung, abort this qe */
				if (device_hung(qe)) {
					list_del(&qe->qe_list);
					INIT_LIST_HEAD(&qe->qe_list);
					abort_qe(qe);
				}

				continue;
			}

			/* don't exceed a single frame */
			if (hp->current_frame->total_bytes +
			    qe->data_len > 1400)
			{
				log_event(2, "frame full qe", (int)qe);
				goto done;
			}

			/*
			 * keep iso and interrupt from exceeding 90%,
			 * give remaining 10% to control and anything left
			 * to bulk.
			 *
			 * pretty simplistic but it's a start...
			 */
			if ((prev_classes + qe->data_len) >
			    frame_cumul_class_quota[qe->qtype])
			{
				log_event(2, "type over quota qe", (int)qe);
				break;
			}

			/* ok, commit. mark busy and remove from list */
			mark_device_busy(qe);

			list_del(&qe->qe_list);
			INIT_LIST_HEAD(&qe->qe_list);

			if (0) log_event(3, "adding qe", (int)qe);

			/* and start the qe */
			add_to_current_frame(qe);
		}
	}

 done:
	unlock_queues(hp);
}

static void
dump_delay_qe_list(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct m8xxhci_qe *qe;

	printk("active_qe %p\n", hp->active_qe);

	printk("delay_qe_list %p\n", delay_qe_list);
	for (qe = delay_qe_list; qe; qe = qe->next) {
		printk("qe %p, delta %d\n", qe, qe->delta);
	}
}

/* put current qe on next frame and reset active to nil */
static void
pace_qe(struct m8xxhci_qe *qe)
{
	log_event(3, "pace qe", (int)qe);

	/* turn off active but don't mark device idle - we're still using it */
	make_inactive_qe(qe);

	add_to_next_frame(qe);
}

static int
service_delay_qe_list(void)
{
	struct m8xxhci_qe *qe;

	/* while top one is ready, add to frame's work */
	while ((qe = delay_qe_list) && qe->delta <= 0) {

		/* if device is busy in this frame, don't */
		if (device_busy(qe)) {
			/* if device is hung, abort this qe */
			if (device_hung(qe)) {
				abort_qe(qe);
				continue;
			}
			break;
		}

		/* mark busy and remove from list */
		mark_device_busy(qe);

		/* take off list */
		delay_qe_list = qe->next;
		qe->next = 0;

		log_event(3, "put delay on current qe", (int)qe);

		/* ok, we assume this is an interrupt transaction... */
		qe->qstate = QS_INTR;

		add_to_current_frame(qe);
	}

	return 0;
}

/*
   put qe on delay list and reset active to nil

   keep a time ordered list of qe's, sorted by their period (in ms)
   the 'delta' is the period from the last qe to the next one
*/
static void
reschedule_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_qe *qe2, *prev;
	int odelta, cum;
	struct urb *urb = qe->urb;
	int period = urb->interval;

	log_event(1, "reschedule qe", (int)qe);
	log_event(4, "delay ms", period);

	if (qdbg) printk("reschedule qe %p\n", qe);

	qe->busys = 0;

	/* if list is empty, start the list */
	if (delay_qe_list == NULL){
		if (qdbg) printk("first\n");
		if (0) log_event(1, "first", period);
		delay_qe_list = qe;
		qe->next = 0;
		qe->delta = period;
	} else {
		/* find where to put this in time order */
		for (qe2 = delay_qe_list, prev = 0, cum = 0;
		     qe2; qe2 = qe2->next)
		{
		  	cum += qe2->delta;
			if (cum > period)
				break;
			prev = qe2;
		}

		if (qdbg) printk("after qe2 %p, prev %p\n", qe2, prev);

		/* link in front of qe2 (if there is one) */
		if (qe2) {
			if (prev)
				prev->next = qe;
			else
				delay_qe_list = qe;
			qe->next = qe2;

			odelta = qe2->delta;
			qe2->delta = cum - period;
			qe->delta = odelta - qe2->delta;

			if (0) log_event(1, "after, delta", qe->delta);
		} else {
			prev->next = qe;
			qe->next = 0;

			qe->delta = period - cum;
			if (0) log_event(1, "end, delta", qe->delta);
		}
	}
}

static void
nak_qe(struct m8xxhci_qe *qe)
{
	switch (qe->qtype) {
	case Q_INTR:
		/* an interrupt transaction got a NAK, reset xmit machine */
		/* and try again next time */
		make_inactive_qe_idle_device(qe);

		reschedule_qe(qe);
		return;

	case Q_ISO:
		/* nak an iso IN; retry IN at next frame */
		pace_qe(qe);
		break;

	default:
		/* effectively reschedule for next frame */
		log_event(1, "nak, delay qe", (int)qe);

		/* pace slow devices, one IN per 1ms frame */
		if (usb_pipeslow(qe->pipe)) {
			pace_qe(qe);
			return;
		}
		break;
	}
}

/* start the next pending qe, in transaction priority order */
static void
pick_next_thing_to_send(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct m8xxhci_qe *qe;
	int i;

	/* if tx bd list is locked, bail out */
	if (txbd_list_busy)
		return;

	/* if actively working on qe, bail out */
	if (hp->active_qe != 0) {
		log_event(3, "run_frame active_qe", (int)hp->active_qe);
		return;
	}

	/* minimalist scheduler */
	for (i = 0; i < 4; i++) {
		/* if we're in progress, wait */
		if (hp->xmit_state[i] != XS_IDLE) {
			log_event(3, "run_frame not idle", i);
			continue;
		}

		while ((qe = take_from_current_frame(i))) {
			log_event(3, "run_frame qe", (int)qe);

			switch (send_qe(qe)) {
			case -1:
				/* can't ever send this - free & exit */
				abort_qe(qe);
				goto done;
				break;
			case 1:
				/* send in progress, stop adding bd's */
				goto done;
				break;
			case 2:
				/* can't send this time - retry later */
				add_to_next_frame(qe);
				goto done;
				break;
			}
		}
	}

 done:
	;
}

static void
switch_frames(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct m8xxhci_frame *tmp;

	if (hp->current_frame == 0) {
		hp->current_frame = &hp->frames[0];
		hp->next_frame = &hp->frames[1];
		return;
	}

	tmp = hp->current_frame;
	hp->current_frame = hp->next_frame;
	hp->next_frame = tmp;
}

static void
schedule_current_frame(void)
{
	/* add any ready interrupt transactions */
	service_delay_qe_list();

	/* add pending who transactions */
	run_queues();
}

/* called once every 1ms; update delay list & see if there is work */
static void
start_of_frame(void)
{
	struct m8xxhci_qe *qe;

	/* update top of delay list */
	if ((qe = delay_qe_list) != 0) {
		--qe->delta;
	}

	/* switch active frames */
	switch_frames();

	/* jam as much as we can into this frame */
	schedule_current_frame();

	/* try and start something */
	pick_next_thing_to_send();
}

/* ---- */

static inline cbd_t *
next_bd(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	cbd_t *bdp;
	int index;

	index = hp->txnext;
	hp->tx_bd_qe[index] = 0;

	bdp = hp->tbase + hp->txnext++;
	if (bdp->cbd_sc & BD_SC_WRAP)
		hp->txnext = 0;

	bdp->cbd_sc &= BD_SC_WRAP;

	hp->txfree--;

	return bdp;
}

static inline cbd_t *
next_bd_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	cbd_t *bdp;
	int index;

	index = hp->txnext;
	bdp = next_bd();
	if (bdp)
		hp->tx_bd_qe[index] = qe;

	return bdp;
}

static void
advance_rx_bd(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	cbd_t *bdp;

	bdp = hp->rbase + hp->rxnext;

	hp->rxnext++;
	if (bdp->cbd_sc & BD_SC_WRAP)
		hp->rxnext = 0;

	bdp->cbd_datlen = 0;
	bdp->cbd_sc &= BD_SC_WRAP;
	bdp->cbd_sc |= BD_SC_EMPTY | BD_SC_INTRPT;
}

/* reset a bd and advance the txlast ptr */
static inline void
advance_tx_bd(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	cbd_t *bdp;

	bdp = hp->tbase + hp->txlast;
	hp->tx_bd_qe[ hp->txlast ] = 0;

	hp->txlast++;
	if (bdp->cbd_sc & BD_SC_WRAP)
		hp->txlast = 0;

	/* collect stats */
	if ((bdp->cbd_sc & (BD_USB_NAK|BD_USB_STAL|BD_USB_TO|BD_USB_UN)))
		hp->stats.tx_err++;

	if (bdp->cbd_sc & BD_USB_NAK)
		hp->stats.tx_nak++;
	if (bdp->cbd_sc & BD_USB_STAL)
		hp->stats.tx_stal++;
	if (bdp->cbd_sc & BD_USB_TO)
		hp->stats.tx_to++;
	if (bdp->cbd_sc & BD_USB_UN)
		hp->stats.tx_un++;

	hp->txfree++;

	/* I turned this off so I could see what had been sent */
#if 1
	bdp->cbd_sc &= BD_SC_WRAP;
	bdp->cbd_datlen = 0;
	bdp->cbd_bufaddr = 0;
#endif
}

static inline int
free_bds(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	return hp->txfree;
}


/* move a queue element (pending transaction) to it's next state */
static void
advance_qe_state(struct m8xxhci_qe *qe)
{
	struct urb *urb = qe->urb;

	qe->retries = 0;

	switch (qe->qstate) {
	case QS_SETUP:
		qe->qstate = QS_SETUP2;

		if (usb_pipeslow(qe->pipe)) {
			pace_qe(qe);
			return;
		}
		break;
	case QS_SETUP2:
		if (qe->data_len > 0) {
			/* give the slow device time to setup after SETUP */
			if (usb_pipeslow(qe->pipe)) {
				pace_qe(qe);
				return;
			}
			break;
		}

		qe->qstate = QS_SETUP3;

		if (usb_pipeslow(qe->pipe)) {
			pace_qe(qe);
			return;
		}
		break;
	case QS_ISO:
		/* don't advance for IN's, we do that in rx code */
		if (usb_pipein(urb->pipe)) {
			if (qe->iso_ptr < urb->number_of_packets) {
				pace_qe(qe);
			}
			break;
		}

		log_event(3, "ISO; frame done", (int)qe);

		urb->iso_frame_desc[qe->iso_ptr].status = 0;
		urb->iso_frame_desc[qe->iso_ptr].actual_length = qe->send_len;

		qe->send_len = 0;

		qe->iso_ptr++;

		log_event(3, "ISO; number_of_packets", urb->number_of_packets);
		log_event(3, "ISO; iso_ptr", qe->iso_ptr);

		if (qe->iso_ptr == urb->number_of_packets) {
			log_event(3, "ISO; all done", (int)qe);
			goto finish_qe;
		}

		qe->data_len = urb->iso_frame_desc[qe->iso_ptr].length;
		qe->retries = 0;
		qe->busys = 0;

		pace_qe(qe);

		break;
	case QS_BULK:
		if (qe->data_len > 0)
			break;

		if (0) printk("BULK done; send_len %d, recv_len %d\n",
			      qe->send_len, qe->recv_len);

		/* fall through */
	case QS_SETUP3:
	case QS_INTR:
	finish_qe:
		qe->qstate = 0;

		make_inactive_qe_idle_device(qe);

		complete_qe(qe, 0);

		if (qe->reschedule) {
			qe->reschedule = 0;
			reschedule_qe(qe);
		}

		break;
	}
}

/* advance h/w tx pointer to match s/w tx ptr */
static void
advance_hw_tx_ptr(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile epb_t *epb = hp->epbptr[0];
	ushort new_tbptr;

	/* advance tx ring ptr to the right spot */
	new_tbptr = epb->epb_tbase + (hp->txlast * sizeof(cbd_t));
	if (epb->epb_tbptr != new_tbptr) {
		epb->epb_tbptr = new_tbptr;
	}
}


/* if active, continue sending else pick next thing to do */
static void
continue_xmit(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct m8xxhci_qe *qe;

	/* if more frames are needed for current qe, or retry, send them now */
	if ((qe = hp->active_qe)) {
		switch (send_qe(qe)) {
		case -1:
			abort_qe(qe);
			break;
		case 1:
			/* we're sending... */
			return;
		case 2:
			/* no time in frame */
			pace_qe(qe);
			break;
		}
	}

	/* nothing on the active_qe... */
	pick_next_thing_to_send();
}


/* run through completed tx bd's, matching them up with qe's */
static void
process_done_txbds(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct m8xxhci_qe *qe;
	cbd_t *bdp;
	int i, retry, nak, alldone, count, status, dbg = 0;

	log_event(3, "process_done_txbds; active_qe", (int)hp->active_qe);

	if (dbg) printk("process_done_txbds() txlast %d, txnext %d\n",
		      hp->txlast, hp->txnext);

	if (dbg) dump_tx_bds("tx bds:");

	while (hp->txlast != hp->txnext) {
		bdp = hp->tbase + hp->txlast;

		if (dbg) printk("txlast %d, txnext %d, sc %04x\n",
				hp->txlast, hp->txnext, bdp->cbd_sc);

		if ((bdp->cbd_sc & BD_SC_READY))
			break;

		/* find the qe */
		qe = hp->tx_bd_qe[ hp->txlast ];

		if (dbg) printk("txlast %d, qe %p\n", hp->txlast, qe);

		/*
		 * if it's a SETUP, follow all the tx bd's
		 * if it's an IN, just one tx bd
		 * if it's an OUT, one tx bd + 'n' more for data
		 */
		if (!qe) {
			advance_tx_bd();
			continue;
		}

		alldone = 1;
		retry = 0;
		nak = 0;
		count = 0;

		/* clean up the bd's for this qe */
		for (i = 0; i < TX_RING_SIZE; i++) {
			if (qe != hp->tx_bd_qe[ hp->txlast ])
				break;

			if (dbg) printk("found tx bd, sc 0x%x\n", bdp->cbd_sc);
			count++;

			status = bdp->cbd_sc;
			log_event(3, "index/sc",
				  (hp->txlast << 16) | status);

			/* note errors */
			retry |= status & (BD_USB_TO | BD_USB_UN);
			nak |= status & (BD_USB_NAK | BD_USB_STAL);

			/* if not done and no errors, keep waiting */
			if ((status & BD_SC_READY)) {
				alldone = 0;
				if (retry == 0 && nak == 0) {
					log_event(3, "qe not done ok", (int)qe);
					return;
				}
				log_event(3, "qe not done err", (int)qe);
			}

			/* if data out & ok, advance send */
			if ((status & (BD_USB_DATA0|BD_USB_DATA1)) &&
			    (qe->qstate == QS_SETUP2 ||
			     qe->qstate == QS_BULK ||
			     qe->qstate == QS_ISO) &&
			    nak == 0 && retry == 0)
			{
				qe->data_len -= bdp->cbd_datlen;
				qe->send_len += bdp->cbd_datlen;
			}

			advance_tx_bd();
			bdp = hp->tbase + hp->txlast;
		}

		log_event(3, "bds scanned ", count);

		if (dbg) printk("retry 0x%x\n", retry);

		if (nak & BD_USB_NAK)
			log_event(3, "nak", nak);
		if (nak & BD_USB_STAL)
			log_event(3, "stall", nak);

#if 1
		/* if we get a timeout on a slow interrupt transactions,
		   pretend it's a nak so we delay and retry later */
		if (retry &&
		    usb_pipeslow(qe->pipe) &&
		    qe->qtype == Q_INTR)
		{
			retry = 0;
			nak = BD_USB_NAK;
		}
#endif

		/* if error, retry transaction */
		if (retry) {
			log_event(3, "retry qe", (int)qe);
			if (dbg) printk("qe %p, retry #%d, state %d\n",
					qe, qe->retries, qe->qstate);
			hp->stats.retransmit++;
			if (++(qe->retries) > MAX_QE_RETRIES) {
				abort_qe(qe);
			}

			/* if we see timeouts on iso IN's which don't */
			/* get data, add this: */
			if (qe->qstate == QS_ISO) {
				printk("iso IN timeout!\n");
#if 0 /* XXX */
				nak_qe(qe);
#endif
			}
		} else {
			/* if short, we tried to read to much and we're done */
			/* if stalled and short, spec says no status phase */
			if (qe->shortread) {
				if ((nak & BD_USB_STAL)) {
					if (qe->qstate == QS_SETUP2)
						qe->qstate = QS_SETUP3;
				}

				/* finish up on short only or short+nak */
				qe->data_len = 0;
				nak = 0;
				alldone = 1;
			}

			/* if nak, resend IN's from where we left off */
			if (nak) {	
				process_done_rxbds();

				/* if stall, abort else retry later */
				if ((nak & BD_USB_STAL)) {
					if (dbg) printk("stall, abort qe %p\n",
							qe);
					abort_qe(qe);
				} else {
					nak_qe(qe);
				}
			} else {
				/* if ok, and done, advance qe state */
				if (dbg) printk("tx ok qe %p\n", qe);

				log_event(3, "tx ok qe", (int)qe);
				log_event(3, "tx ok alldone", alldone);

				if (alldone) {
					/* in case any rx's snuck in */
					process_done_rxbds();
					advance_qe_state(qe);
				}
			}
		}

		/* if we got a short read or tx timeout of some flavor
		 * we will have cleaned up the bds's and left a gap
		 * between where the hw things the next tx is and where
		 * we think it is.  so, we need to rectify this situation...
		 */
		advance_hw_tx_ptr();

#ifdef DEBUG_CHECKS
		/* sanity check */
		if (hp->active_qe && qe != hp->active_qe) {
			printk("woa! qe %p != active_qe %p\n",
			       qe, hp->active_qe);
		}
#endif
	}

	if (dbg) printk("process_done_txbds() exit\n");

	log_event(3, "process_done_txbds; done, active_qe",
		  (int)hp->active_qe);
}

static void
process_data(struct m8xxhci_qe *qe, int d01, u_char *data, int len)
{
	u_char *ptr;

	if (d01)
		log_event(2, "process_data1 len", len);
	else
		log_event(2, "process_data0 len", len);

	ptr = qe->data;
	if (qe->qstate == QS_ISO)
		ptr = qe->iso_data;

	if (len > 0 && ptr) {
		memcpy(ptr + qe->recv_len, data, len);
		qe->recv_len += len;
		/* reduce how much we ask for in case we get a NAK and retry */
		qe->data_len -= len;
		log_event(1, "total len", qe->recv_len);
	}
}

/* we filled up an iso rx; complete it at interrupt level... */
static void
complete_iso_rx(struct m8xxhci_qe *qe)
{
	struct urb *urb;

	if (qe == 0)
		return;

	make_inactive_qe_idle_device(qe);

	urb = qe->urb;

	if (urb->complete) {
		urb->complete(urb);
		unlink_urb(urb, qe->qtype);
	}
}

/*
 * got an iso rx; advance iso urb state
 *
 * we do this because there is no in,datax,ack for iso rx and we can't
 * advance the state in the tx completion like we do everything else
 */
static void
process_iso_rx(struct m8xxhci_qe *qe)
{
	struct urb *urb = qe->urb;

	if (0) printk("process_iso_rx() [%d/%d] recv_len %d\n",
		      qe->iso_ptr, urb->number_of_packets, qe->recv_len);
	log_event(3, "process_iso_rx iso_ptr", 
		  (qe->iso_ptr << 16) | urb->number_of_packets);
	log_event(3, "recv_len", qe->recv_len);

	urb->iso_frame_desc[qe->iso_ptr].status = 0;
	urb->iso_frame_desc[qe->iso_ptr].actual_length = qe->recv_len;

	qe->recv_len = 0;
	qe->shortread = 0;

	qe->iso_ptr++;

	if (qe->iso_ptr == urb->number_of_packets) {
#if 0
		dump_events();
#endif
		complete_iso_rx(qe);
		return;
	}

	qe->data_len = urb->iso_frame_desc[qe->iso_ptr].length;
	qe->retries = 0;
	qe->busys = 0;
}

/* run through completed rx bd's, matching them up with irq's */
static void
process_done_rxbds(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	cbd_t *bdp;
	int status, bl, got_data;
	u_char *bp;
	struct m8xxhci_qe *qe;
	struct urb *urb;

	while (1) {
		log_event(3, "process_done_rxbds rxnext", hp->rxnext);
		bdp = hp->rbase + hp->rxnext;

		status = bdp->cbd_sc;
		bp = __va(bdp->cbd_bufaddr);
		bl = bdp->cbd_datlen - 2;

		if (0) printk("status %x, bp %p, bl %d, active_qe %p\n",
			      status, bp, bl, hp->active_qe);

		if ((status & BD_SC_EMPTY))
			break;

		if ((status & 0x1e)) {
			hp->stats.rx_err++;

			if ((status & BD_USB_CRC))
				hp->stats.rx_crc++;
			if ((status & BD_USB_AB))
				hp->stats.rx_abort++;
			if ((status & BD_USB_NONOCT))
				hp->stats.rx_nonoct++;

#if 0
			dump_state(1, 1, 1);
			dump_events();
#endif

			log_event(1, "rx err sc", status);
			printk("rx err sc (status 0x%x)\n", status);

			/* pretend we got not data to force a retry */
			bl = 0;
			status &= ~BD_USB_RX_PID;
		}

		if ((qe = hp->active_qe)) {
			/* copy the data */
			got_data = 0;

			switch (status & BD_USB_RX_PID) {
			case BD_USB_RX_DATA0:
				process_data(qe, 0, bp, bl);
				got_data = 1;
				break;
			case BD_USB_RX_DATA1:
				process_data(qe, 1, bp, bl);
				got_data = 1;
				break;
			}

			/* function may be signaling read is done */
			if (got_data && bl < qe->maxpacketsize) {
				log_event(3, "short read qe", (int)qe);
				qe->shortread = 1;
			}

			/* match rx bd to urb and update it */
			if (got_data && (urb = qe->urb)) {
				if (0) printk("found urb %p, recv_len %d\n",
					      urb, qe->recv_len);

				urb->actual_length = qe->recv_len;

				/*
				 * don't complete urbs here - do it in the
				 * xmit isr, as MOT assured me the xmit 
				 * won't complete till after the ack...
				 */

				/*
				 * unless, of course, it's an iso and there
				 * is no ack...
				 */
				if (qe->qstate == QS_ISO) {
					process_iso_rx(qe);
				}
			}
		}

		advance_rx_bd();
	}
}

static void
process_bsy(void)
{
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	volatile usbregs_t *usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	log_event(1, "process_bsy", 0);

	/* hack */
	usbregs->usb_usmod &= ~USMOD_EN;

	m8xxhci_flush_recv();
	m8xxhci_flush_xmit();

	usbregs->usb_usmod |= USMOD_EN;
}

void
m8xxhci_tx_err(int ber)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile cpm8xx_t *cp = cpmp;

	log_event(3, "m8xxhci_tx_err ber", ber);
	
	if ((ber & BER_TXB) == 0) {
		process_done_txbds();
	}
	
	hp->stats.txe[0]++;

	/* restart tx endpoint */
	cp->cp_cpcr = 0x2f01;
	mb();
	
	while (cp->cp_cpcr & 0x1);

	hp->stats.restart_tx++;

	log_event(3, "m8xxhci_tx_err done", 0);
}

static void
m8xxhci_interrupt(void *hci_p)
{
	volatile struct	m8xxhci_private	*hp = (struct m8xxhci_private *)hci_p;
	volatile	immap_t		*immap;
	volatile	usbpr_t		*usbprmap;
	volatile	usbregs_t	*usbregs;
	ushort ber;
	int serviced_tx = 0;

	hp->stats.interrupts++;

	/* get ptr to 8xx internal registers */
	immap = (immap_t *)IMAP_ADDR;

	/* usb param ram */
	usbprmap = (usbpr_t *)immap->im_cpm.cp_dparam;

	/* usb control registers */
	usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	/* sample and reset the ber */
	ber = usbregs->usb_usber;
	usbregs->usb_usber = ber;

	/* note: rx bd's must be processed before tx bds */
	/* (we depend on this) */
	if (ber & BER_RXB) {
		log_event(3, "RXB interrupt, ber", ber);
		hp->stats.rxb++;
		process_done_rxbds();
	}

	if (ber & BER_TXB) {
		hp->stats.txb++;
		process_done_txbds();
		serviced_tx = 1;
	}

	if (ber & BER_BSY) {
		printk("BSY INTERRUPT ber 0x%x\n", ber);
		log_event(1, "BSY INTERRUPT ber", ber);
		hp->stats.bsy++;
		process_bsy();
	}

	if (ber & BER_SOF) {
		hp->stats.sof++;
#ifdef USB_UCODE_PATCH
		//log_event(3, "SOF interrupt, ber", ber);

		if (0) printk("SOF INTERRUPT ber 0x%x, frame %u\n",
			      ber,hp->frame_no);

		hp->frame_no++;
		if (hp->frame_no > 0x7ff)
		  hp->frame_no = 0;

		usbprmap->usb_frame_n =
			(((~do_crc(hp->frame_no, 11)) & 0x1f) << 11) |
				hp->frame_no;
#endif
	}

	if (ber & BER_TXE0) {
		log_event(3, "TXE0 interrupt, ber", ber);
		m8xxhci_tx_err(ber);
		serviced_tx = 1;
	}

	if (ber & BER_TXE1) {
		hp->stats.txe[1]++;
	}

	if (ber & BER_TXE2) {
		hp->stats.txe[2]++;
	}

	if (ber & BER_TXE3) {
		hp->stats.txe[3]++;
	}

	if (ber & BER_IDLE) {
		hp->stats.idle++;

		if ((usbregs->usb_usbmr & BER_IDLE)) {
			usbregs->usb_usbmr &= ~BER_IDLE;
			log_event(1, "usbmr turn idle off", usbregs->usb_usbmr);

			m8xxhci_events |= EV_IDLE;

			if (waitqueue_active(&m8xxhci_configure)) {
				wake_up(&m8xxhci_configure);
			}
		}
	}

	if (ber & BER_RESET) {
		hp->stats.reset++;

		if ((usbregs->usb_usbmr & BER_RESET)) {
			usbregs->usb_usbmr &= ~BER_RESET;
			log_event(1, "usbmr turn reset off", usbregs->usb_usbmr);

			m8xxhci_events |= EV_RESET;

			if (waitqueue_active(&m8xxhci_configure)) {
				wake_up(&m8xxhci_configure);
			}
		}
	}

	/* if we serviced the tx bd's, look at adding more */
	if (serviced_tx) {
		continue_xmit();
	}
}

static void
lock_tx_ring(struct m8xxhci_private *hp)
{
	unsigned long flags;
	spin_lock_irqsave(&txbd_list_lock, flags);
	txbd_list_busy++;
	spin_unlock_irqrestore(&txbd_list_lock, flags);
}

static void
unlock_tx_ring(struct m8xxhci_private *hp)
{
	unsigned long flags;
	spin_lock_irqsave(&txbd_list_lock, flags);
	txbd_list_busy--;
	spin_unlock_irqrestore(&txbd_list_lock, flags);
}

#ifndef USB_UCODE_PATCH
/*
   add SOF frame to tx ring
   does NOT lock ring
*/
static void
add_sof_to_tx_ring(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile cbd_t *bdp;
	int bytes;

	/* always leave 2 bds for a control message */
	if (free_bds() < 3) {
		return;
	}

	bytes = (((~do_crc(hp->frame_no, 11)) & 0x1f) << 11) | hp->frame_no;
	hp->frame_no++;
	if (hp->frame_no > 0x7ff)
		hp->frame_no = 0;

	hp->sof_pkt[0] = SOF;
	hp->sof_pkt[1] = bytes & 0xff;
	hp->sof_pkt[2] = bytes >> 8;

	flush_dcache_range((int)hp->sof_pkt, (int)hp->sof_pkt+3);

	bdp = next_bd();
	bdp->cbd_datlen = 3;
	bdp->cbd_bufaddr = __pa(hp->sof_pkt);
	bdp->cbd_sc |= BD_SC_READY | BD_SC_LAST | BD_SC_INTRPT;
}
#endif

void
m8xxhci_dump(void)
{
	dump_delay_qe_list();
	dump_pending_qe_list();
	dump_frame_lists();
}

static int dumpthem;
static void
check_switches(void)
{
	if ((*((volatile uint *)RPX_CSR_ADDR) & 0x20) == 0) {
		if (dumpthem == 0) {
			dumpthem = 1;
			m8xxhci_dump();
		}
	}

	if ((*((volatile uint *)RPX_CSR_ADDR) & 0x40) == 0) {
		dump_state(1, 1, 1);
		dump_events();
	}
}

#ifndef USB_UCODE_PATCH
static spinlock_t need_sof_lock = SPIN_LOCK_UNLOCKED;
#endif

/* called every 1 ms to generate SOF frames */
static void
m8xxhci_timer_interrupt(void *context)
{
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	m8xxhci_timer_ticks++;

	/* reset the interrupt */
	immap->im_cpmtimer.cpmt_ter4 = 0xffff;

#if 1
	check_switches();
#endif

#ifndef USB_UCODE_PATCH
	/* we assume interrupts are disabled */
	spin_lock(&txbd_list_lock);

	if (txbd_list_busy == 0) {
		add_sof_to_tx_ring();
		m8xxhci_kick_xmit(0);
	} else {
		spin_unlock(&txbd_list_lock);
		spin_lock(&need_sof_lock);
		hp->need_sof++;
		spin_unlock(&need_sof_lock);
		return;
	}

	spin_unlock(&txbd_list_lock);
#endif /* USB_UCODE_PATCH */

	/* if nothing connected, query every .5 secs */
	if (++(hp->ms_count) == 100/*500*/) {
		hp->ms_count = 0;
		hp->need_query = 1;

		m8xxhci_events |= EV_QUERY;

		if (waitqueue_active(&m8xxhci_configure)) {
			wake_up(&m8xxhci_configure);
		}
	}

	/* check pending irqs */
	start_of_frame();
}

/*
   fill tx bd's

   unsigned long flags;

   spin_lock_irqsave(&txbd_list_lock, flags);
   txbd_list_busy++;
   spin_unlock_irqrestore(&txbd_list_lock, flags);

   take from queue(s)
   fill in txbd(s)

   spin_lock_irqsave(&txbd_list_lock, flags);
   txbd_list_busy--;
   spin_unlock_irqrestore(&txbd_list_lock, flags);

   spin_lock_irqsave(&txbd_sof_lock, flags);
   if (hp->need_sof) {
---> note; the sof should really be placed at the first descriptor
       add_sof_to_tx_ring();
       hp->need_sof--;
   }
   spin_unlock_irqrestore(&txbd_sof_lock, flags);

   m8xxhci_kick_xmit(0);
*/

static int tmr_count;
static int tmr_bytes_per_count;

static void
m8xxhci_timer_start(void)
{
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	int count;

	if (m8xxhci_verbose)
		printk("m8xxhci_timer_start()\n");

#define CPMTIMER_TMR_ORI 	0x0010	/* output reference interrupt enable */
#define CPMTIMER_TMR_FRR 	0x0008	/* free run/restart */
#define CPMTIMER_TMR_ICLK_INT16	0x0004	/* source internal clock/16 */
#define CPMTIMER_TMR_ICLK_INT	0x0002	/* source internal clock */

	/* guess the timer freq based on the process freq */
	{
		bd_t *bd = (bd_t *)__res;

		count = (bd->bi_intfreq * 1000000) / (16 * 1000);

		if (m8xxhci_verbose)
			printk("intfreq %d, busfreq %d, count %d\n",
			       bd->bi_intfreq, bd->bi_busfreq, count);
	}

	/* 1280 bytes per USB frame */
	tmr_count = count;
	tmr_bytes_per_count =
		(count + BYTES_PER_USB_FRAME-1) / BYTES_PER_USB_FRAME;
#define MIN_BYTES_LEFT 64
//#define MIN_BYTES_LEFT 100
//#define MIN_BYTES_LEFT 300


#ifdef USB_UCODE_PATCH
//#define USE_BRG3_FOR_SOF
#define USE_BRG1_FOR_SOF
#define FIX_SMC1_BRG1
#endif

#define CPMTIMER_TGCR_CAS4 	0x8000	/* cascade timer */
#define CPMTIMER_TGCR_FRZ4 	0x4000	/* freeze timer */
#define CPMTIMER_TGCR_STP4 	0x2000	/* stop timer */
#define CPMTIMER_TGCR_RST4 	0x1000	/* restart timer */

	/* reset timer4 */
	immap->im_cpmtimer.cpmt_tgcr &= ~(CPMTIMER_TGCR_CAS4 |
					  CPMTIMER_TGCR_FRZ4 |
					  CPMTIMER_TGCR_STP4);

	immap->im_cpmtimer.cpmt_tmr4 =
		CPMTIMER_TMR_ORI | CPMTIMER_TMR_FRR | CPMTIMER_TMR_ICLK_INT16;

	/* (GCLK2[48Mhz] / 16) / 3000 = 1ms */
	immap->im_cpmtimer.cpmt_trr4 = count;
	immap->im_cpmtimer.cpmt_tcr4 = 0;
	immap->im_cpmtimer.cpmt_tcn4 = 0;
	immap->im_cpmtimer.cpmt_ter4 = 0xffff;

	/* set up interrupt handler */
	cpm_install_handler(CPMVEC_TIMER4, m8xxhci_timer_interrupt, (void *)0);

#ifdef USE_BRG1_FOR_SOF
	if (m8xxhci_verbose)
		printk("m8xxhci: USING BRG1 FOR SOF!\n");

#ifdef FIX_SMC1_BRG1
	{
	  volatile immap_t *imp = (immap_t *)IMAP_ADDR;
	  volatile cpm8xx_t *commproc = (cpm8xx_t *)&imp->im_cpm;

	  /* move smc1 from brg1 to brg2 */
	  commproc->cp_brgc2 = commproc->cp_brgc1;
	  commproc->cp_simode &= ~0xf000;
	  commproc->cp_simode |= 0x1000;
	}
#endif

	{
		bd_t *bd = (bd_t *)__res;
		volatile cpm8xx_t *cp = cpmp;

		/* 1ms clock */
		count = (bd->bi_intfreq * 1000000) / (16 * 1000);

		if (m8xxhci_verbose)
			printk("intfreq %d, count %d\n",
			       bd->bi_intfreq, count);

		/* enable, use internal clock synth */
		cp->cp_brgc1 = 0x00010000 | (count << 1) | 1;
	}

	/* make BRG1 (PA7) an output */
#define PA_DR7 0x0100
	immap->im_ioport.iop_padir |= PA_DR7;
	immap->im_ioport.iop_papar |= PA_DR7;
	immap->im_ioport.iop_paodr &= ~PA_DR7;
#endif

#ifdef USE_BRG3_FOR_SOF
	if (m8xxhci_verbose)
		printk("USING BRG3 FOR SOF!\n");

	{
		bd_t *bd = (bd_t *)res;
		volatile cpm8xx_t *cp = cpmp;

		/* 1ms clock */
		count = (bd->bi_intfreq * 1000000) / (16 * 1000);

		if (m8xxhci_verbose)
			printk("intfreq %d, count %d\n",
			       bd->bi_intfreq, count);

		/* enable, use internal clock synth */
		cp->cp_brgc3 = 0x00010000 | (count << 1) | 1;
	}

	/* make BRG3 an output */
#define PB_DR28 0x0008
	immap->im_cpm.cp_pbdir &= ~PB_DR28;
	immap->im_cpm.cp_pbpar |= PB_DR28;
#endif

	/* sync up internal timer with edge of BRG signal */

	/* wait for edge of BRG1 */
	while ((immap->im_ioport.iop_padat & PA_DR7) == 0)
		;

	while ((immap->im_ioport.iop_padat & PA_DR7))
		;

	/* then set the correct count for 1ms */
	immap->im_cpmtimer.cpmt_trr4 = count;
	immap->im_cpmtimer.cpmt_ter4 = 0xffff;

	/* and start timer */
	immap->im_cpmtimer.cpmt_tgcr |= CPMTIMER_TGCR_RST4;
}

static void
dump_tx_bds(char *str)
{
	int i;
	volatile struct	m8xxhci_private	*hp = m8xxhci_ptr;
	u_char *p;
	printk("%s\n", str);
	for (i = 0; i < TX_RING_SIZE; i++) {
		printk("%p %08x/%08x ",
		       (uint *)(hp->tbase+i),
		       ((uint *)(hp->tbase+i))[0],
		       ((uint *)(hp->tbase+i))[1]);
		p = (u_char *)((uint *)(hp->tbase+i))[1];
		if (p) {
			p = (u_char *)__va(p);
			printk("%02x %02x %02x %02x",
			       p[0], p[1], p[2], p[3]);
		}
		printk("\n");
	}
}

static void
dump_tx_state(void)
{
	volatile struct	m8xxhci_private	*hp = m8xxhci_ptr;
	volatile epb_t *epb = hp->epbptr[0];
	printk("ep0: tstate %x, tbptr %x tptr %x\n",
	       epb->epb_tstate, epb->epb_tbptr, epb->epb_tptr);
}

static void
dump_rx_bds(char *str)
{
	int i;
	volatile struct	m8xxhci_private	*hp = m8xxhci_ptr;
	printk("%s\n", str);
	for (i = 0; i < RX_RING_SIZE; i++) {
		int len;
		u_char *p;

		printk("%p %08x/%08x\n",
		       (uint *)(hp->rbase+i),
		       ((uint *)(hp->rbase+i))[0],
		       ((uint *)(hp->rbase+i))[1]);
		
		len = ((uint *)(hp->rbase+i))[0];
		len &= 0x0fff;
		if (len > 16) len = 16;
		p = (u_char *)((uint *)(hp->rbase+i))[1];
		if (len > 0 && p) {
			p = (u_char *)__va(p);
			while (len > 0) {
				printk("    %02x %02x %02x %02x %02x %02x %02x %02x\n",
				       p[0], p[1], p[2], p[3],
				       p[4], p[5], p[6], p[7]);
				p += 8;
				len -= 8;
			}
		}
	}
}

static void
dump_rx_state(void)
{
	volatile usbpr_t	*usbprmap =
		(usbpr_t *)((immap_t *)IMAP_ADDR)->im_cpm.cp_dparam;

	printk("rstate 0x%x, rptr %08x, rbcnt 0x%08x\n",
	       usbprmap->usb_rstate, 
	       usbprmap->usb_rptr, 
	       usbprmap->usb_rbcnt);

	if (0) {
		volatile immap_t *immap = (immap_t *)IMAP_ADDR;
		printk("padat 0x%04x (masked 0x%04x)\n",
		       immap->im_ioport.iop_padat,
		       immap->im_ioport.iop_padat & (PA_USB_RXD | PA_USB_OE));
		printk("pcdat 0x%04x (masked 0x%04x)\n",
		       immap->im_ioport.iop_pcdat,
		       immap->im_ioport.iop_pcdat & (PC_USB_RXP | PC_USB_RXN));
	}
}

static void
dump_state(int stats, int rx, int tx)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	if (0) printk("kick, txbd_list_busy %d\n", txbd_list_busy);

	if (stats) {
		printk("int %lu: idle %lu, rst %lu, bsy %lu, rxb %lu, txb %lu\n",
		       hp->stats.interrupts,
		       hp->stats.idle,
		       hp->stats.reset,
		       hp->stats.bsy,
		       hp->stats.rxb,
		       hp->stats.txb);

		printk("txe0 %lu, nak %lu, stal %lu, to %lu, un %lu\n",
		       hp->stats.txe[0], hp->stats.tx_nak, hp->stats.tx_stal,
		       hp->stats.tx_to, hp->stats.tx_un);

		printk("rexmit     %lu\n", hp->stats.retransmit);
		printk("restart tx %lu\n", hp->stats.restart_tx);

		printk("txfree %d, txlast %d, txnext %d\n",
		       hp->txfree, hp->txlast, hp->txnext);
	}

	if (rx) {
		dump_rx_state();
		dump_rx_bds("rx bds: ");
	}

	if (tx) {
		dump_tx_state();
		lock_tx_ring(hp);
		dump_tx_bds("tx bds: ");
		unlock_tx_ring(hp);
	}
}

static void
dump_root_hub(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	int i;

	printk("hp->root_hub %p, hp->root_hub->usb %p\n",
	       hp->root_hub, hp->root_hub->usb);

	for (i = 0; i < USB_MAXCHILDREN; i++) {
		if (hp->root_hub->usb->children[i])
			printk("children[%d]=%p\n",
			       i, hp->root_hub->usb->children[i]);
	}
}

/*
 * De-allocate all resources..
 */
static void
release_m8xxhci(struct m8xxhci_private *m8xxhci)
{
	kfree(m8xxhci->bus);
	kfree(m8xxhci);
}

/* our one and only USB port has gone away */
static void
m8xxhci_disconnect_device(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	int i;

#ifdef DEBUG_CHECKS
	if (m8xxhci_debug) {
		printk("m8xxhci_disconnect_device()\n");
		dump_root_hub();
	}
#endif

	/* loose all the children */
	for (i = 0; i < USB_MAXCHILDREN; i++)
		if (hp->root_hub->usb->children[i])
			usb_disconnect(&hp->root_hub->usb->children[i]);

	/* loose the device itself */
	usb_disconnect(&hp->root_hub->usb);

	hp->root_hub = 0;
	hp->bus->root_hub = 0;
}

/* something is connected to our one and only USB port */
static void
m8xxhci_connect_device(void)
{
	struct m8xxhci_private	*hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct usb_device	*usb_dev;
	struct m8xxhci_device	*dev;

	log_event(1, "m8xxhci_connect_device", 0);
	if (m8xxhci_verbose) printk("m8xxhci_connect_device()\n");

	/* allocate root of tree */
	usb_dev = usb_alloc_dev(NULL, hp->bus);
	if (!usb_dev) {
		err("m8xxhci: couldn't allocate usb_device");
		return;
	}

	/* allocate local device info */
	dev = add_local_dev(usb_dev);
	if (!dev) {
		usb_free_dev(usb_dev);
		err("m8xxhci: couldn't allocate internal device");
		return;
	}

	/* keep track of directly connected device (most likely a hub) */
	hp->root_hub = usb_to_m8xxhci(usb_dev);

	usb_dev->bus = hp->bus;
	hp->bus->root_hub = usb_dev;

//	usb_init_root_hub(usb_dev);

	usb_dev->maxchild = USB_MAXCHILDREN;

	/* assign it a number */
	usb_connect(usb_dev);

	/* wait for powerup */
	wait_ms(200);

	/* query it */
	usb_new_device(usb_dev);
}

static int
m8xxhci_query_device(void)
{
	int devnum, endpoint, status;
	unsigned long flags;
	struct m8xxhci_qe *qe;
	struct usb_device dd, *dev;
	static unsigned char cmd[8] = { 0x80, 0x06, 0, 1, 0, 0, 8, 0 };
	static unsigned char data[64];

	log_event(2, "m8xxhci_query_device", 0);
	if (0) printk("m8xxhci_query_device()\n");

	memset((char *)&dd, 0, sizeof(dd));

	/* query the root hub's only child */
	if (m8xxhci_ptr->root_hub) {
		dev = m8xxhci_to_usb(m8xxhci_ptr->root_hub);
	} else {
		dev = &dd;
	}

	if (0) printk("m8xxhci_query_device() root_hub %p, dev %p, dd %p\n",
		      m8xxhci_ptr->root_hub, dev, &dd);

	endpoint = 0;
	devnum = 0;

	if (dev->devnum >= 0)
		devnum = dev->devnum;

	if (0) printk("hub devnum %d, using devnum %d\n", dev->devnum, devnum);

	/* build queue element */
	qe = allocate_qe((struct m8xxhci_device *)0, Q_CTRL);
	if (qe == 0) {
		return -1;
	}

	log_event(1, "query qe", (int)qe);

	qe->dev = dev;
	qe->pipe = usb_rcvdefctrl(dev);
	qe->devnum = devnum;
	qe->endpoint = endpoint;
	qe->cmd = cmd;
	qe->data = data;
	qe->data_len = 8;
	qe->qstate = QS_SETUP;
	qe->status = 1;
	qe->urb = 0;

	/* place qe on queue */
	spin_lock_irqsave(&queue_lock, flags);
	enqueue_qe(qe, Q_CTRL);
	spin_unlock_irqrestore(&queue_lock, flags);

#if 0
	/* start working */
	run_queues();
#endif

	wait_for_qe(qe);

	status = qe->status;

	log_event(1, "m8xxhci_query_device done status", status);
	if (0) printk("m8xxhci_query_device() done status %d\n", status);

	deallocate_qe(qe);

	return status;
}

static void
m8xxhci_event(int what)
{
	struct m8xxhci_private	*hp = (struct m8xxhci_private *)m8xxhci_ptr;

	log_event(2, "m8xxhci_event what", what);
	if (0) printk("m8xxhci_event() what=%d\n", what);

	if ((what & EV_QUERY)) {
		switch (hp->driver_state) {
		case DS_FIND_DEVICE:
			assert_reset(0);
			if (m8xxhci_query_device() == 0) {
				hp->driver_state = DS_READY;
				log_event(1, "device found", 0);
				printk("m8xxhci: device found!\n");
				m8xxhci_connect_device();
			}
			break;
		case DS_READY:
			if (m8xxhci_query_device()) {
				log_event(1, "device missing", 0);
				hp->driver_state = DS_MISSING;
			}
			break;
		case DS_MISSING:
			if (m8xxhci_query_device() == 0) {
				log_event(1, "device found again", 0);
				hp->driver_state = DS_READY;
			} else {
				log_event(1, "device lost", 0);
				printk("m8xxhci: device lost\n");
#if 0
				dump_events();
#endif
				m8xxhci_disconnect_device();
				hp->driver_state = DS_FIND_DEVICE;
			}
			break;
		}
	}

#if 0
	/* only notice going idle */
	if (!(what & EV_IDLE))
		return;

	m8xxhci_connect_device();

	/* allow USB RESET condition interrupts again */
	if (0)
	{
		volatile immap_t		*immap;
		volatile usbregs_t	*usbregs;

		immap = (immap_t *)IMAP_ADDR;
		usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

		usbregs->usb_usber = BER_RESET | BER_IDLE;
		usbregs->usb_usbmr |= BER_RESET | BER_IDLE;
		mb();
		if (1) printk("usbmr reset 0x%x\n", usbregs->usb_usbmr);
	}
#endif
}

static int
m8xxhci_thread(void *__hub)
{
	volatile	immap_t		*immap;
	volatile	usbregs_t	*usbregs;
	volatile struct	m8xxhci_private	*hp = m8xxhci_ptr;

	immap = (immap_t *)IMAP_ADDR;
	usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	if (m8xxhci_verbose)
		printk("m8xxhci: control thread starting\n");

	/*
	 * This thread doesn't need any user-level access,
	 * so get rid of all our resources
	 */
	lock_kernel();
	exit_files(current);
	daemonize();
	unlock_kernel();

	/* Setup a nice name */
	strcpy(current->comm, "m8xxhci-control");

	for(;;) {
		int what;

		if ((what = m8xxhci_events)) {
			m8xxhci_events = 0;
			m8xxhci_event(what);
			continue;
		}

		interruptible_sleep_on(&m8xxhci_configure);
	}

	cleanup_drivers();

	m8xxhci_stop_controller();
	release_m8xxhci((struct	m8xxhci_private	*)hp);
	MOD_DEC_USE_COUNT;

	if (m8xxhci_verbose)
		printk("m8xxhci: control thread exiting\n");

	return 0;
}

#if TEST_THREAD /*XXX hack */

static void
add_pkt_to_tx_ring(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile cbd_t *bdp;

	/* always leave 2 bds for a control message */
	if (free_bds() < 3) {
		return;
	}

	hp->sof_pkt[0] = 0x99;
	hp->sof_pkt[1] = 0x02;
	hp->sof_pkt[2] = 0x01;

	flush_dcache_range(hp->sof_pkt, hp->sof_pkt+3);

	bdp = next_bd();
	bdp->cbd_datlen = 20;
	bdp->cbd_bufaddr = __pa(hp->sof_pkt);
	bdp->cbd_sc |= BD_SC_READY | BD_SC_LAST | BD_SC_INTRPT;
}

static void
send_marker(int i)
{
	struct m8xxhci_private *hp = m8xxhci_ptr;
	int bytes;
	unsigned long ticks;

	while (1) {
		time_left_in_frame(&bytes);
		ticks = m8xxhci_timer_ticks;

		if (bytes < i) {
			lock_tx_ring(hp);
			add_pkt_to_tx_ring();
			m8xxhci_kick_xmit(0);
			unlock_tx_ring(hp);

			while (ticks == m8xxhci_timer_ticks)
				;

			break;
		}
	}
}

static int
test_thread(void *x)
{
	int i, j;
	unsigned long ticks;

	if (m8xxhci_verbose)
		printk("m8xxhci: test thread starting\n");

	lock_kernel();
	exit_files(current);
	daemonize();
	unlock_kernel();
	strcpy(current->comm, "test");

	ticks = m8xxhci_timer_ticks;
	while (ticks == m8xxhci_timer_ticks)
		;

	for(;;) {
		for (i = 1300; i > 50; i -= 10) {
			for (j = 0; j < 100; j++)
				send_marker(i);
		}
	}


	return 0;
}
#endif


/* alloc cpm memory on a 32 byte boundary */
static int
cpm_32b_dpalloc(int size)
{
	int index, new_index;
	index = m8xx_cpm_dpalloc(size + 32);
	new_index = (index + 31) & ~0x1f;
	/*printk("index old 0x%x new 0x%x\n", index, new_index);*/
	return new_index;
}

static int
cpm_8b_dpalloc(int size)
{
	int index, new_index;
	index = m8xx_cpm_dpalloc(size + 8);
	new_index = (index + 7) & ~0x7;
	/*printk("index old 0x%x new 0x%x\n", index, new_index);*/
	return new_index;
}

static void
reset_tx_ring(void)
{
	volatile struct m8xxhci_private *hp = m8xxhci_ptr;
	volatile cbd_t *bdp;
	int i;

	/* reset tx bd ring entries */
	bdp = hp->tbase;
	for (i = 0; i < TX_RING_SIZE; i++) {
		hp->tx_bd_qe[i] = 0;
		bdp->cbd_sc = 0;
		bdp->cbd_bufaddr = 0;
		bdp++;
	}
	
	/* set the last buffer to wrap */
	bdp--;
	bdp->cbd_sc |= BD_SC_WRAP;
	
	hp->txnext = 0;
	hp->txlast = 0;
	hp->txfree = TX_RING_SIZE;
}

static void
reset_rx_ring(void)
{
	volatile	struct m8xxhci_private *hp = m8xxhci_ptr;
	volatile	cbd_t		*bdp;
	int i;
	
	bdp = hp->rbase;
	for (i = 0; i < RX_RING_SIZE; i++) {
		bdp->cbd_sc = BD_SC_EMPTY | BD_SC_INTRPT;
		bdp->cbd_datlen = 0;
		bdp++;
	}
	
	/* set the last buffer to wrap */
	bdp--;
	bdp->cbd_sc |= BD_SC_WRAP;
	
	hp->rxnext = 0;
}		

void
m8xxhci_flush_recv(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile epb_t *epb;
	
	epb = hp->epbptr[0];
	epb->epb_rbptr = epb->epb_rbase;
	
	reset_rx_ring();
}

void
m8xxhci_flush_xmit(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile cpm8xx_t *cp = cpmp;
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	volatile usbregs_t *usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];
	volatile epb_t *epb;

	if (0) printk("m8xxhci_flush_xmit()\n");

	/* stop tx endpoint */
	cp->cp_cpcr = 0x1f01 | (0 << 2);
	mb();
	while (cp->cp_cpcr & 0x1);
	
	/* flush fifo */
	eieio();
	usbregs->usb_uscom = 0x40 | 0;
	mb();
	
	/* reset ring */
	epb = hp->epbptr[0];
	epb->epb_tbptr = epb->epb_tbase;
	
	reset_tx_ring();
	
	/* restart tx endpoint */
	cp->cp_cpcr = 0x2f01 | (0 << 2);
	mb();
	while (cp->cp_cpcr & 0x1);

	if (0) printk("m8xxhci_flush_xmit() done\n");
}

void
m8xxhci_kick_xmit(void)
{
	volatile immap_t	*immap = (immap_t *)IMAP_ADDR;
	volatile usbregs_t	*usbregs;

	usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	eieio();
	usbregs->usb_uscom = 0x80 | 0;
	mb();
}

static spinlock_t urblist_lock = SPIN_LOCK_UNLOCKED;

static void add_urb_list(struct urb *urb)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	unsigned long flags;

	spin_lock_irqsave(&urblist_lock, flags);
	list_add_tail(&urb->urb_list, &hp->urb_list);
	spin_unlock_irqrestore(&urblist_lock, flags);
}

static void remove_urb_list(struct urb *urb)
{
	unsigned long flags;

	spin_lock_irqsave(&urblist_lock, flags);
	if (!list_empty(&urb->urb_list))
	{
		list_del(&urb->urb_list);
		INIT_LIST_HEAD(&urb->urb_list);
	}
	spin_unlock_irqrestore(&urblist_lock, flags);
}
 

static int
time_left_in_frame(int *bytes)
{
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	int counts_left, bytes_left;

	/*
	   12Mhz bus, byte time = 1/12Mhz * 8 = 667ns
	   1 frame = 1ms or 1,000,000ns
	   1 frame = 1,000,000 / 667 = 1499 bytes

	   but 1280 is the right number...
	   (preamble? bit stuffing?)
	 */
	counts_left = tmr_count - immap->im_cpmtimer.cpmt_tcn4;
	bytes_left = counts_left / tmr_bytes_per_count;

	log_event(3, "bytes_left", bytes_left);

	*bytes = bytes_left;

	/*
	 * Be careful! if we crash into the SOF send, the transmit
	 * will lock up...
	 */
	if (bytes_left < MIN_BYTES_LEFT) {
		return 0;
	}

	return 1;
}

/*
   send the next frame for a queue element, depending on it's state
   if the qe is a SETUP, multiple frames are actually send

   SETUP
	[setup stage]
	->	setup	3 bytes
	->	data0	8 bytes
					<- ack
	[optional data stage]
	->	out/in	3 bytes					
	->	datax	n bytes					
					<- ack
	[status stage]
	->	out/in	3 bytes					
	->	data1	0 bytes					
					<- ack

   example:
     get descriptor
   	-> setup(3), data0(8) 0x80, 0x06, 0, 1, 0, 0, 8, 0
					<- ack
        -> in(3)
					<- data1(8)
	-> ack
	-> out(3), data1(0)
					<- ack

     set address
   	-> setup(3), data0(8) 0x80, 0x05, 0, 1, 0, 0, 0, 0
					<- ack
	-> in(3)
					<- data1(0)
	-> ack

*/
static int
send_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile cbd_t	*bdp, *first_bdp;
	int bytes, len, data_bd_count, ret, maxsze;
	unsigned char token, *data;
	unsigned long flags;
	struct urb *urb = qe->urb;
	iso_packet_descriptor_t *ipd;


	maxsze = qe->dev ? usb_maxpacket(qe->dev, qe->pipe,
					 usb_pipeout(qe->pipe)) : 8;

	if (maxsze < 8)
		maxsze = 8;

	qe->maxpacketsize = maxsze;

	if (/*qe->urb == 0*/1) {
		log_event(1, "send_qe qe", (int)qe);
		log_event(3, "qstate", qe->qstate);
	}

spin_lock_irqsave(&txbd_list_lock, flags);

	/* time check */
	if (!time_left_in_frame(&bytes)) {
		spin_unlock_irqrestore(&txbd_list_lock, flags);
		return 2;
	}

	/* paranoid check */
	if (hp->active_qe && hp->active_qe != qe) {
		log_event(1, "send_qe busy with", (int)hp->active_qe);

#if 1
		if (hp->active_qe->busys == MAX_QE_STALLED) {
			log_event(1, "stalled? kicking tx!", 0);

			m8xxhci_kick_xmit();
		}
#endif
		ret = 2;

		if (++(hp->active_qe->busys) > MAX_QE_BUSYS) {
			ret = -1;
		}

		spin_unlock_irqrestore(&txbd_list_lock, flags);
		return ret;
	}

	make_active_qe(qe);

//	spin_unlock_irqrestore(&txbd_list_lock, flags);

	/* setup for building tx bds */
	ret = 0;
	first_bdp = 0;

	/* lock the tx bd ring */
//	lock_tx_ring(hp);

	switch (qe->qstate) {
	case QS_SETUP:
		qe->whichdata = 0;
		log_event(2, "SETUP; len", qe->data_len);
		log_event(2, "SETUP; devnum/endpoint",
			  (qe->devnum << 16) | qe->endpoint);
		if (usb_pipeslow(qe->pipe))
			log_event(2, "SETUP; slow", usb_pipeslow(qe->pipe));
		else
			log_event(3, "SETUP; slow", usb_pipeslow(qe->pipe));

		if (free_bds() < 2) {
			log_event(1, "SETUP; no bds! qe", (int)qe);
			printk("SETUP; no bds!\n");
#ifdef DEBUG_CHECKS
//			dump_state(1,0,1);
			dump_events();
#endif
			ret = 2;
			break;
		}

		/* setup stage transaction (SETUP + DATA0) */
		bytes = calc_crc5(qe->devnum, qe->endpoint);
		qe->ph[0] = SETUP;
		qe->ph[1] = bytes & 0xff;
		qe->ph[2] = bytes >> 8;

		bdp = next_bd_qe(qe);
		bdp->cbd_datlen = 3;
		bdp->cbd_bufaddr = __pa(qe->ph);
		bdp->cbd_sc |= BD_SC_LAST;

		if (usb_pipeslow(qe->pipe))
			bdp->cbd_sc |= BD_USB_LSP;

		/* don't set ready in BD */
		first_bdp = bdp;

		bdp = next_bd_qe(qe);
		bdp->cbd_datlen = 8;
		bdp->cbd_bufaddr = __pa(qe->cmd);
		bdp->cbd_sc |=
			BD_SC_READY | BD_SC_LAST |
			BD_USB_DATA0 |
			BD_USB_TC | BD_USB_CNF | BD_SC_INTRPT;

		flush_dcache_range((int)qe->cmd, (int)qe->cmd+8);

		/* more to do - send these and stop adding bd's for now */
		ret = 1;
		hp->xmit_state[qe->qtype] = XS_SETUP;
		break;

	case QS_ISO:
		log_event(3, "ISO; devnum/endpoint",
			  (qe->devnum << 16) | qe->endpoint);

		maxsze = 1024;

		ipd = &urb->iso_frame_desc[qe->iso_ptr];
		qe->iso_data = qe->data + ipd->offset;

		token = usb_pipeout(qe->pipe) ? OUT : IN;
		len = ipd->length;
		data = qe->data + ipd->offset + ipd->actual_length;

		/* if no space in frame, wait... */
		if (len > bytes) {
			ret = 2;
			break;
		}

		log_event(3, "ISO; len", len);
		log_event(3, "ISO; ipd offset", ipd->offset);
		log_event(3, "ISO; idp actual_len", ipd->actual_length);
		log_event(3, "ISO; iso_ptr", qe->iso_ptr);

		bytes = calc_crc5(qe->devnum, qe->endpoint);
		qe->ph[0] = token;
		qe->ph[1] = bytes & 0xff;
		qe->ph[2] = bytes >> 8;

		first_bdp = 0;

		while (len > 0) {
			if (len < maxsze)
				maxsze = len;

			/* data stage (OUT+DATAx or IN) */
			bdp = next_bd_qe(qe);
			bdp->cbd_datlen = 3;
			bdp->cbd_bufaddr = __pa(qe->ph);

			/* don't set ready in first BD */
			if (first_bdp == 0) {
				first_bdp = bdp;
				bdp->cbd_sc |= BD_SC_LAST;
			} else {
				bdp->cbd_sc |= BD_SC_READY | BD_SC_LAST;
			}

			switch (token) {
			case OUT:
				/* follow OUT with DATAx */
				log_event(3, "ISO; add OUT len", maxsze);
				if (0) printk("ISO; add OUT len %d\n", maxsze);

				bdp = next_bd_qe(qe);
				bdp->cbd_datlen = maxsze;
				bdp->cbd_bufaddr = __pa(data);
				bdp->cbd_sc |=
					BD_SC_READY | BD_SC_LAST |
					BD_USB_DATA0 | BD_USB_TC;
				break;
			case IN:
				log_event(3, "ISO; add IN len", maxsze);
				break;
			}

			data += maxsze;
			len -= maxsze;
		}

		/* set interrupt on last bd */
		if (first_bdp) {
			bdp->cbd_sc |= BD_SC_INTRPT;

			/* more to do - stop sending bd's */
			ret = 1;
			hp->xmit_state[qe->qtype] = XS_IN;
			break;
		}
		break;

	case QS_BULK:
		log_event(3, "BULK; devnum/endpoint",
			  (qe->devnum << 16) | qe->endpoint);
		qe->whichdata = 1;
		/* fall through */

	case QS_SETUP2:
		/* calc how many bd's we need to send this transaction */
		data_bd_count = (qe->data_len + maxsze - 1) / maxsze;

		if (0) printk("SETUP2; bd_count %d, data_len %d\n",
			      data_bd_count, qe->data_len);

		if (free_bds() < 4 + data_bd_count) {
			/* requeue, we don't have enough bd's  */
			log_event(1, "SETUP2; no bds! qe", (int)qe);
#ifdef DEBUG_CHECKS
			if (1) {
				printk("m8xxhci: SETUP2/BULK; no bds! "
				       "bd_count %d, data_len %d\n",
				       data_bd_count, qe->data_len);
//				dump_state(1, 1, 1);
			}
#endif
			ret = 2;
			break;
		}

		/*
		 * If direction is "send", change the frame from SETUP (0x2D)
		 * to OUT (0xE1). Else change it from SETUP to IN (0x69)
		 */
		token = usb_pipeout(qe->pipe) ? OUT : IN;
		len = qe->data_len;
		data = qe->data + qe->send_len;

		/* if we're resending, we need to fix upt the data0/1 marker */
		if (qe->send_len > 0) {
			qe->whichdata = (qe->data_len / maxsze) & 1;
			if (qe->qstate == QS_SETUP2)
				qe->whichdata ^= 1;
		}

#if 1
		/* experiment - trim down to available bytes left in frame */
		if (len > bytes) {
			bytes = (bytes * 9) / 10;
			len = (bytes / maxsze) * maxsze;
			log_event(3, "trim size to", len);
			if (len == 0) {
				ret = 2;
				break;
			}
		}
#endif

		log_event(3, qe->qstate == QS_SETUP2 ? 
			  "SETUP2; send_len" : "BULK; send_len",
			  qe->send_len);

		log_event(3, qe->qstate == QS_SETUP2 ? 
			  "SETUP2; len" : "BULK; len", len);

		bytes = calc_crc5(qe->devnum, qe->endpoint);
		qe->ph[0] = token;
		qe->ph[1] = bytes & 0xff;
		qe->ph[2] = bytes >> 8;

		first_bdp = 0;

		while (len > 0) {
			if (len < maxsze)
				maxsze = len;

			qe->whichdata ^= 1;

			/* data stage (OUT+DATAx or IN) */
			bdp = next_bd_qe(qe);
			bdp->cbd_datlen = 3;
			bdp->cbd_bufaddr = __pa(qe->ph);

			/* don't set ready in first BD */
			if (first_bdp == 0) {
				first_bdp = bdp;
				bdp->cbd_sc |= BD_SC_LAST;
			} else {
				bdp->cbd_sc |= BD_SC_READY | BD_SC_LAST;
			}

			if (usb_pipeslow(qe->pipe))
				bdp->cbd_sc |= BD_USB_LSP;

			switch (token) {
			case OUT:
				/* follow OUT with DATAx */
				log_event(3, qe->qstate == QS_SETUP2 ? 
					  "SETUP2; add OUT len" :
					  "BULK; add OUT len",
					  maxsze);

				bdp = next_bd_qe(qe);
				bdp->cbd_datlen = maxsze;
				bdp->cbd_bufaddr = __pa(data);
				bdp->cbd_sc |=
					BD_SC_READY | BD_SC_LAST |
					(qe->whichdata ? BD_USB_DATA1 :
							 BD_USB_DATA0) |
					BD_USB_TC | BD_USB_CNF;
				break;
			case IN:
				log_event(3, qe->qstate == QS_SETUP2 ? 
					  "SETUP2; add IN len" :
					  "BULK; add IN len",
					  maxsze);

				bdp->cbd_sc |= BD_USB_CNF;

				if (usb_pipeslow(qe->pipe))
					len = 0;

				break;
			}

			data += maxsze;
			len -= maxsze;
		}

		/* set interrupt on last bd */
		if (first_bdp) {
			bdp->cbd_sc |= BD_SC_INTRPT;

			/* more to do - stop sending bd's */
			ret = 1;
			break;
		}

		/* fall through if no bd's (i.e. len == 0) */
		qe->qstate = QS_SETUP3;

	case QS_SETUP3:
		/* status stage transaction (IN or OUT w/zero-len data) */
		token = usb_pipeout(qe->pipe) ? IN : OUT;
		log_event(3, "SETUP3; token", token);

		if (free_bds() < 2) {
			if (1) printk("m8xxhci: SETUP3; no bds!\n");
			ret = 2;
			break;
		}

		bytes = calc_crc5(qe->devnum, qe->endpoint);
		qe->ph[0] = token;
		qe->ph[1] = bytes & 0xff;
		qe->ph[2] = bytes >> 8;

		bdp = next_bd_qe(qe);
		bdp->cbd_datlen = 3;
		bdp->cbd_bufaddr = __pa(qe->ph);
		bdp->cbd_sc |= BD_SC_LAST;
		/* don't set ready in BD */
		first_bdp = bdp;

		if (usb_pipeslow(qe->pipe))
			bdp->cbd_sc |= BD_USB_LSP;

		switch (token) {
		case OUT:
			/* send STATUS stage empty DATA1 packet */
			bdp = next_bd_qe(qe);
			bdp->cbd_datlen = 0;
			bdp->cbd_bufaddr = 0;
			bdp->cbd_sc |=
				BD_SC_READY | BD_SC_LAST |
				BD_USB_DATA1 | BD_USB_TC |
				BD_USB_CNF | BD_SC_INTRPT;
			break;
		case IN:
			/* get STATUS stage empty DATA1 packet */
			bdp->cbd_sc |= BD_USB_CNF | BD_SC_INTRPT;
			break;
		}

		/* done */
		ret = 0;
		break;

	case QS_INTR:
		log_event(2, "IN; devnum", qe->devnum);
		token = IN;
		qe->whichdata = 0;

		if (free_bds() < 2) {
			if (1) printk("m8xxhci: IN; no bds!\n");
			log_event(1, "IN; no bds! qe", (int)qe);
#ifdef DEBUG_CHECKS
			dump_events();
#endif
			ret = 2;
			break;
		}

		bytes = calc_crc5(qe->devnum, qe->endpoint);
		qe->ph[0] = token;
		qe->ph[1] = bytes & 0xff;
		qe->ph[2] = bytes >> 8;

		bdp = next_bd_qe(qe);
		bdp->cbd_datlen = 3;
		bdp->cbd_bufaddr = __pa(qe->ph);
		bdp->cbd_sc |= BD_SC_LAST | BD_USB_CNF | BD_SC_INTRPT;
		/* don't set ready in BD */
		first_bdp = bdp;

		if (usb_pipeslow(qe->pipe))
			bdp->cbd_sc |= BD_USB_LSP;

		/* done */
		ret = 0;
		hp->xmit_state[qe->qtype] = XS_IN;
		break;
	}

	/* now allow the whole shabang to go by setting the first BD ready */
	if (first_bdp)
		first_bdp->cbd_sc |= BD_SC_READY;

	/* unlock the tx ring */
//	unlock_tx_ring(hp);

	flush_dcache_range((int)qe->ph, (int)qe->ph+3);

#ifndef USB_UCODE_PATCH
	/* check if we need an SOF */
	spin_lock_irqsave(&need_sof_lock, flags);
	if (hp->need_sof) {
		/* note; the sof should really put in the first descriptor */
		add_sof_to_tx_ring();
		hp->need_sof--;
	}
	spin_unlock_irqrestore(&need_sof_lock, flags);
#endif

	m8xxhci_kick_xmit();

	/* if we changed our mind, turn off active qe */
	if (ret == 2) {
		make_inactive_qe(qe);
	}

spin_unlock_irqrestore(&txbd_list_lock, flags);

	log_event(3, "send_qe done", 0);

	return ret;
}

static int submit_urb(urb_t *urb, int qtype)
{
	struct m8xxhci_device *dev = usb_to_m8xxhci(urb->dev);
	int devnum, endpoint;
	unsigned long flags;
	struct m8xxhci_qe *qe;

	log_event(2, "submit_urb urb", (int)urb);
	log_event(3, "submit_urb qtype", (int)qtype);

	/* the "pipe" thing contains the destination in bits 8--18 */
	devnum = usb_pipedevice(urb->pipe);
	endpoint = usb_pipeendpoint(urb->pipe);

	/* build queue element */
	qe = allocate_qe(dev, qtype);
	if (qe == 0) {
		return -1;
	}

#if 0
	if (qtype == Q_BULK || qtype == Q_ISO) {
		printk("submit_urb(urb=%p,type=%d) "
		       "qe=%p, devnum %d, endpoint %d, pipein %d\n",
		       urb, qtype, qe, devnum, endpoint,
		       usb_pipein(urb->pipe));
	}
#endif

	log_event(1, "urb qe", (int)qe);
	log_event(1, "urb dev", (int)urb->dev);

	qe->dev = urb->dev;
	qe->pipe = urb->pipe;
	qe->devnum = devnum;
	qe->endpoint = endpoint;
	qe->cmd = urb->setup_packet;
	qe->data = urb->transfer_buffer;
	qe->data_len = urb->transfer_buffer_length;
	qe->status = 1;
	qe->urb = urb;

	switch (qtype) {
	case Q_CTRL:
		qe->qstate = QS_SETUP;
		break;
	case Q_INTR:
		qe->qstate = QS_INTR;
		add_urb_list(urb);
		break;
	case Q_BULK:
		qe->qstate = QS_BULK;
		break;
	case Q_ISO:
		qe->qstate = QS_ISO;
		qe->data_len = urb->iso_frame_desc[0].length;
		break;
	}

	urb->hcpriv = qe;
	urb->actual_length = 0;
	urb->status = USB_ST_URB_PENDING; 

	/* place qe on queue */
	spin_lock_irqsave(&queue_lock, flags);
	enqueue_qe(qe, qtype);
	spin_unlock_irqrestore(&queue_lock, flags);

#if 0
	/* start working */
	run_queues();
#endif

	return -EINPROGRESS;
}

/* forcably remove a qe */
static void
unlink_qe(struct m8xxhci_qe *qe)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	unsigned long flags;
	int i;

	spin_lock_irqsave(&queue_lock, flags);

	/* if not active we assume it's on a queue */
	if (qe != hp->active_qe) {
		dequeue_qe(qe);
	} else {
		/* we're active, clean up any tx ring ptrs */
		for (i = 0; i < TX_RING_SIZE; i++) {
			if (hp->tx_bd_qe[i] == qe)
				hp->tx_bd_qe[ i ] = 0;
		}
	}

	deactivate_qe(qe);
	deallocate_qe(qe);

	spin_unlock_irqrestore(&queue_lock, flags);
}

static int
unlink_urb(urb_t *urb, int qtype)
{
	struct m8xxhci_qe *qe;

#if 0
	if (urb->status != 0) {
		dump_events();
	}
#endif

	log_event(2, "unlink_urb urb", (int)urb);

	if (m8xxhci_debug && urb->status != 0)
		printk("unlink_urb(urb=%p,qtype=%d) status %d\n",
		       urb, qtype, urb->status);

	/* if we're connected to a qe, unlink it */
	if ((qe = (struct m8xxhci_qe *)urb->hcpriv)) {

		unlink_qe(qe);

		switch (qtype) {
		case Q_INTR:
			remove_urb_list(urb);
			break;
		}

		urb->hcpriv = 0;
	}

	if (m8xxhci_debug && urb->status != 0)
		printk("unlink_urb(urb=%p) done\n", urb);

	return 0;
}


/* ------------- */

static void
assert_resume(int disable)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	volatile usbregs_t *usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	lock_tx_ring(hp);

	if (disable) {
		usbregs->usb_usmod &= ~USMOD_EN;
	}

	usbregs->usb_usmod |= USMOD_RESUME;
	udelay(100); /* 100us */
	usbregs->usb_usmod &= ~USMOD_RESUME;

	if (disable) {
		usbregs->usb_usmod |= USMOD_EN;
	}

	unlock_tx_ring(hp);
}

static void
assert_reset(int disable)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	volatile usbregs_t *usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	lock_tx_ring(hp);

disable = 0;
	if (disable) {
		usbregs->usb_usmod &= ~USMOD_EN;
	}

	/* assert reset */
	immap->im_ioport.iop_pcdir |= (PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcpar &= ~(PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcdat &= ~(PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcdat = 0;

	immap->im_ioport.iop_padir |= PA_USB_OE;
	immap->im_ioport.iop_papar &= ~PA_USB_OE;
	immap->im_ioport.iop_padat &= ~PA_USB_OE;

	udelay(200); /* 200us */

	immap->im_ioport.iop_pcdir |= (PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcpar |= (PC_USB_TXP | PC_USB_TXN);

	immap->im_ioport.iop_padir &= ~PA_USB_OE;
	immap->im_ioport.iop_papar |= PA_USB_OE;

	if (disable) {
		usbregs->usb_usmod |= USMOD_EN;
	}

	udelay(100); /* 100us */

	unlock_tx_ring(hp);
}

static int
check_bus(void)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
	volatile usbregs_t *usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];
	int disable = 1;

	lock_tx_ring(hp);

	if (disable) {
		usbregs->usb_usmod &= ~USMOD_EN;
	}

	printk("padat 0x%04x (masked RXD 0x%04x)\n",
	       immap->im_ioport.iop_padat,
	       immap->im_ioport.iop_padat & PA_USB_RXD);
	printk("pcdat 0x%04x (masked RXP+RXN 0x%04x)\n",
	       immap->im_ioport.iop_pcdat,
	       immap->im_ioport.iop_pcdat & (PC_USB_RXP | PC_USB_RXN));

	immap->im_ioport.iop_padir |= PA_USB_OE;
	immap->im_ioport.iop_papar &= ~PA_USB_OE;

	immap->im_ioport.iop_padat |= PA_USB_OE;

	wait_ms(1);

	printk("padat 0x%04x (masked RXD 0x%04x)\n",
	       immap->im_ioport.iop_padat,
	       immap->im_ioport.iop_padat & PA_USB_RXD);
	printk("pcdat 0x%04x (masked RXP+RXN 0x%04x)\n",
	       immap->im_ioport.iop_pcdat,
	       immap->im_ioport.iop_pcdat & (PC_USB_RXP | PC_USB_RXN));

	immap->im_ioport.iop_padir &= ~PA_USB_OE;
	immap->im_ioport.iop_papar |= PA_USB_OE;

	immap->im_ioport.iop_padat = 0;

	unlock_tx_ring(hp);

	return 0;
}
	

int
m8xxhci_setup_usb_clock(void)
{
	volatile	cpm8xx_t	*cp;
	volatile	immap_t		*immap;

	/* get ptr to 8xx internal registers */
	immap = (immap_t *)IMAP_ADDR;
	
	/* Get pointer to Communication Processor */
	cp = cpmp;

#define USE_PA5_CLK3
//#define USE_PA7_CLK1
//#define USE_BRG3
//#define USE_BRG4

#ifdef USE_PA7_CLK1
#define PA_DR7	((ushort)0x0100)
	printk("m8xxhci: USING CLK1 for SOF timer!\n");

	/* we assume a 48Mhz system clock connected to CLK1 via PA7 */
	immap->im_ioport.iop_padir &= ~PA_DR7;
	immap->im_ioport.iop_papar |= PA_DR7;
	
	/* control bits in SICR to route CLK1 to USB (R1CS) */
#define SICR_USB_MASK	((uint)0x000000ff)
#define SICR_USB_CLKRT	((uint)0x00000020) /* CLK1 */
	
	/* configure Serial Interface clock routing */
	cp->cp_sicr &= ~SICR_USB_MASK;
	cp->cp_sicr |= SICR_USB_CLKRT;
#endif

#ifdef USE_PA5_CLK3
#define PA_DR5	((ushort)0x0400)
	printk("m8xxhci: USING CLK3 for SOF timer!\n");

	/* we assume a 48Mhz system clock connected to CLK3 via PA5 */
	immap->im_ioport.iop_padir &= ~PA_DR5;
	immap->im_ioport.iop_papar |= PA_DR5;
	
	/* control bits in SICR to route CLK3 to USB (R1CS) */
#define SICR_USB_MASK	((uint)0x000000ff)
#define SICR_USB_CLKRT	((uint)0x00000030)
	
	/* configure Serial Interface clock routing */
	cp->cp_sicr &= ~SICR_USB_MASK;
	cp->cp_sicr |= SICR_USB_CLKRT;
#endif
	
#ifdef USE_BRG3
	if (m8xxhci_verbose)
		printk("m8xxhci: USING BRG3 for SOF patch!\n");

	/* we assume a 48Mhz system clock */
	cp->cp_brgc3 = 0x00010000;

#define PB_DR28 0x0008
	immap->im_cpm.cp_pbdir &= ~PB_DR28;
	immap->im_cpm.cp_pbpar |= PB_DR28;
	
	/* control bits in SICR to route BRG3 to USB (R1CS) */
#define SICR_USB_MASK	((uint)0x000000ff)
#define SICR_USB_CLKRT	((uint)0x00000010) /* brg3 */
	
	/* configure Serial Interface clock routing */
	cp->cp_sicr &= ~SICR_USB_MASK;
	cp->cp_sicr |= SICR_USB_CLKRT;
#endif

#ifdef USE_BRG4
	if (m8xxhci_verbose)
		printk("m8xxhci: USING BRG4 for SOF patch!\n");

	/* we assume a 48Mhz system clock */
	printk("cp_brgc4 0x%x before\n", cp->cp_brgc4);
	cp->cp_brgc4 = 0x00010000;
	printk("cp_brgc4 0x%x\n", cp->cp_brgc4);

	/* control bits in SICR to route BRG4 to USB (R1CS) */
#define SICR_USB_MASK	((uint)0x000000ff)
#define SICR_USB_CLKRT	((uint)0x00000018)
	
	/* configure Serial Interface clock routing */
	cp->cp_sicr &= ~SICR_USB_MASK;
	cp->cp_sicr |= SICR_USB_CLKRT;
#endif

	return 0;
}

int
m8xxhci_setup_usb_pins(void)
{
	volatile	immap_t		*immap;

	/* get ptr to 8xx internal registers */
	immap = (immap_t *)IMAP_ADDR;
	
	/* select USBRXD & USBOE* */
	immap->im_ioport.iop_padir &= ~(PA_USB_RXD | PA_USB_OE);
	immap->im_ioport.iop_papar |= (PA_USB_RXD | PA_USB_OE);
	immap->im_ioport.iop_paodr &= ~PA_USB_OE;
	immap->im_ioport.iop_padat = 0;
immap->im_ioport.iop_padat = PA_USB_OE;

	/* select USBRXP & USBRXN */
	immap->im_ioport.iop_pcdir &= ~(PC_USB_RXP | PC_USB_RXN);
	immap->im_ioport.iop_pcpar &= ~(PC_USB_RXP | PC_USB_RXN);
	immap->im_ioport.iop_pcso |= (PC_USB_RXP | PC_USB_RXN);

#ifdef USB_UCODE_PATCH
#define PC_USB_SOF	0x0001 /* bit 15, dreq0? */
	immap->im_ioport.iop_pcpar &= ~PC_USB_SOF;
	immap->im_ioport.iop_pcdir &= ~PC_USB_SOF;
	immap->im_ioport.iop_pcso |= PC_USB_SOF;
	immap->im_ioport.iop_pcint |= PC_USB_SOF;
#endif

	/* select USBTXP and USBTXN */
#if DISABLE_SEND
	/* disable send side */
	immap->im_ioport.iop_padir |= PA_USB_OE;
	immap->im_ioport.iop_papar &= ~PA_USB_OE;
	immap->im_ioport.iop_padat |= PA_USB_OE;

	immap->im_ioport.iop_pcdir &= ~(PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcpar &= ~(PC_USB_TXP | PC_USB_TXN);
#else
	immap->im_ioport.iop_pcdir |= (PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcpar |= (PC_USB_TXP | PC_USB_TXN);
	immap->im_ioport.iop_pcdat = 0;
#endif

	return 0;
}

int
m8xxhci_setup_board_specific(void)
{
#ifdef CONFIG_RPXLITE_AW
#endif

#ifdef CONFIG_RPXLITE_CW
/* CSR bits moved on rev CW boards */
#undef BCSR0_USBDISABLE
#undef BCSR0_USBHISPEED
#undef BCSR0_USBPWREN
#define BCSR0_USBDISABLE	((uint)0x00008000)
#define BCSR0_USBHISPEED	((uint)0x00004000)
#define BCSR0_USBPWREN		((uint)0x00002000)
#define BCSR0_ENUSBCLK		((uint)0x00001000)
#define BCSR0_ENPA5HDR		((uint)0x00000800)
#endif

#ifdef CONFIG_RPXLITE_DW
/* This bit added for DW boards */
#define BCSR0_BRG1TOPC15	((uint)0x00000400)
#endif

#if defined(CONFIG_RPXLITE)
	/* set the configuration to enable USB */
	*((volatile uint *)RPX_CSR_ADDR) |=
		(BCSR0_USBHISPEED | BCSR0_USBPWREN)
#ifdef CONFIG_RPXLITE_CW
		| (BCSR0_ENUSBCLK | BCSR0_ENPA5HDR)
#endif
#ifdef CONFIG_RPXLITE_DW
		| (BCSR0_BRG1TOPC15)
#endif
		;

	*((volatile uint *)RPX_CSR_ADDR) &= ~BCSR0_USBDISABLE;

	if (0) printk("RPX_CSR %08x\n", *((volatile uint *)RPX_CSR_ADDR));
#endif
	
	return 0;
}

void
m8xxhci_stop_controller(void)
{
	volatile immap_t	*immap;
	volatile usbregs_t	*usbregs;

	if (m8xxhci_verbose)
		printk("m8xxhci_stop_controller()\n");

	/* get ptr to 8xx internal registers */
	immap = (immap_t *)IMAP_ADDR;

	/* usb control registers */
	usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];

	usbregs->usb_usmod = USMOD_HOST | USMOD_TEST;
}

int
m8xxhci_start_controller(void)
{
	volatile	struct m8xxhci_private *hp = m8xxhci_ptr;
	volatile	cpm8xx_t	*cp;
	volatile	immap_t		*immap;
	volatile	usbpr_t		*usbprmap;
	volatile	usbregs_t	*usbregs;
	volatile	cbd_t		*bdp;
	volatile	epb_t		*epb;
	unsigned long	mem_addr;
	pte_t		*pte;
	int		i, j, k, index, count;
	
	if (m8xxhci_verbose) printk("m8xxhci_start_controller()\n");
	
	/* get ptr to 8xx internal registers */
	immap = (immap_t *)IMAP_ADDR;
	
	/* usb param ram */
	usbprmap = (usbpr_t *)immap->im_cpm.cp_dparam;
	
	/* usb control registers */
	usbregs = (usbregs_t *)&immap->im_cpm.cp_scc[0];
	
	/* get pointer to Communication Processor */
	cp = cpmp;
	
	if (0) printk("hp %p, immap %p, usbprmap %p, usbregs %p, cp %p\n",
		      hp, immap, usbprmap, usbregs, cp);

	{
	  extern unsigned int _get_IMMR(void);
	  unsigned int immr_reg, partnum, masknum, bad;

	  immr_reg = _get_IMMR();

	  partnum = (immr_reg & 0xff00) >> 8;
	  masknum = (immr_reg & 0x00ff);

	  bad = 1;

	  switch (partnum) {
	  case 0x24:
	    printk("m8xxhci: MPC823B mask %d\n", masknum);
	    bad = 0;
	    break;
	  case 0x21:
	    printk("m8xxhci: MPC823A mask %d\n", masknum);
	    break;
	  case 0x20:
	    printk("m8xxhci: MPC850 mask %d\n", masknum);
	    break;
	  case 0x00:
	    printk("m8xxhci: MPC860? mask %d\n", masknum);
	    break;
	  default:
	    printk("m8xxhci: unknown partnum 0x%04x, masknum 0x%04x\n",
		   partnum, masknum);
	    break;
	  }

	  if (bad)
		  return -1;
	}

	/* set up USB section of chip */
	m8xxhci_setup_usb_clock();
	m8xxhci_setup_usb_pins();

	if (m8xxhci_verbose) 
		printk("ring sizes: rx %d, tx %d\n",
		       (int)RX_RING_SIZE, TX_RING_SIZE);
	
	/* set up EPxPTR's */

	/* these addresses need to be a on 32 byte boundary */
	index = cpm_32b_dpalloc(sizeof(epb_t));
	usbprmap->usb_epbptr[0] = index;
	hp->epbptr[0] = (epb_t *)&cp->cp_dpmem[index];
	epb = hp->epbptr[0];
		
	if (0) printk("endpoint 0 0x%x, epb %p\n", index, epb);
	if (0) printk("rstate %p\n", &usbprmap->usb_rstate);
		
	/* alloc rx bd ring */
	index = cpm_8b_dpalloc(sizeof(cbd_t) * RX_RING_SIZE);
	epb->epb_rbase = index;
	hp->rbase = (cbd_t *)&cp->cp_dpmem[index];
		
	/* alloc tx bd ring */
	index = cpm_8b_dpalloc(sizeof(cbd_t) * TX_RING_SIZE);
	epb->epb_tbase = index;
	hp->tbase = (cbd_t *)&cp->cp_dpmem[index];
		
	/* reset tx bd ring entries */
	reset_tx_ring();

	if (0) printk("set up tx ring @ %p\n", bdp);

	/* set rx bd ring entries */
	bdp = hp->rbase;
	count = 0;
	if (0) printk("set up rx ring @ %p\n", bdp);
	for (j = 0; j < CPM_USB_RX_PAGES; j++) {
			
		/* allocate a page */
		mem_addr = __get_free_page(GFP_KERNEL);
			
		/* make it uncached */
		pte = va_to_pte(mem_addr);
		pte_val(*pte) |= _PAGE_NO_CACHE;
		flush_tlb_page(current->mm->mmap, mem_addr);
			
		/* initialize the BD for every fragment in the page */
		for (k = 0; k < CPM_USB_RX_FRPPG; k++) {
			bdp->cbd_sc = BD_SC_EMPTY | BD_SC_INTRPT;
			bdp->cbd_datlen = 0;
			bdp->cbd_bufaddr = __pa(mem_addr);
			mem_addr += CPM_USB_RX_FRSIZE;
			bdp++;
			/* allow for small ring (and wasted space) */
			if (++count >= RX_RING_SIZE)
				goto done;
		}
	}
		
	/* set the last buffer to wrap */
 done:
	bdp--;
	bdp->cbd_sc |= BD_SC_WRAP;
		
	epb->epb_rfcr = FCR_BE;
	epb->epb_tfcr = FCR_BE;

	epb->epb_mrblr = MAX_RBE;
		
	epb->epb_rbptr = epb->epb_rbase;
	epb->epb_tbptr = epb->epb_tbase;
	if (0) printk("tbptr %08x\n", epb->epb_tbptr);

	epb->epb_tstate = 0;

	if (0) printk("usep%d @ %p\n", i, &usbregs->usb_usep[0]);

	usbregs->usb_usep[0] = USEP_TM_CONTROL | USEP_MF_ENABLED;

	usbprmap->usb_rstate = 0;
	usbprmap->usb_frame_n = 0;

#ifdef USB_UCODE_PATCH
	usbprmap->usb_frame_n =
		(((~do_crc(hp->frame_no, 11)) & 0x1f) << 11) | hp->frame_no;
#endif
	
	/* set 12Mbps endpoint mode & disable usb */
	usbregs->usb_usmod = USMOD_HOST | USMOD_TEST;
	
	/* set address */
	usbregs->usb_usadr = 0;
	
	/* clear USCOM */
	usbregs->usb_uscom = 0;
	
	/* reset event register & interrupt mask */
	usbregs->usb_usber = 0xffff;
	usbregs->usb_usbmr = 0xffff;
	
	/* install our interrupt handler */
	if (0) printk("m8xxhci_init() install int handler\n");
	
	cpm_install_handler(CPMVEC_USB, m8xxhci_interrupt, (void *)hp);

	/* turn on board specific bits */
	m8xxhci_setup_board_specific();

	/* wait for powerup */
	wait_ms(200);

	assert_reset(0);
	
#if 0
	verify_patch(immap);
#endif

	/* enable USB controller */
	if (m8xxhci_verbose) printk("m8xxhci_init() enable USB controller\n");
#ifdef M8XXHCI_LOOP
	printk("m8xxhci_init() LOOPBACK!\n");
	usbregs->usb_usmod = USMOD_HOST | USMOD_TEST | USMOD_EN;
#else
	usbregs->usb_usmod = USMOD_HOST;
	usbregs->usb_usmod |= USMOD_EN;
#endif

	set_event_level(3/*1*/);
	log_event(1, "controller enabled", 0);

	/* start the SOF timer */
	m8xxhci_timer_start();
	
	if (m8xxhci_verbose)
		printk("usb bus: %sidle\n", usbregs->usb_usbs ? "" : "NOT ");

#if 0
	check_bus();
#endif

	return 0;
}

static struct m8xxhci_device *
add_local_dev(struct usb_device *usb_dev)
{
	struct m8xxhci_device *dev;

	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
	if (!dev) {
		return 0;
	}

	memset(dev, 0, sizeof(*dev));

	usb_dev->hcpriv = dev;
	dev->usb = usb_dev;

	return dev;
}

static void
free_local_dev(struct usb_device *usb_dev)
{
	struct m8xxhci_device *dev;

	dev = usb_to_m8xxhci(usb_dev);
	if (dev) {
		kfree(dev);
	}

	usb_dev->hcpriv = 0;
}

/*
 * Only the USB core should call m8xxhci_alloc_dev and m8xxhci_free_dev
 */
static int m8xxhci_alloc_dev(struct usb_device *usb_dev)
{
	struct m8xxhci_device *dev;

	if (m8xxhci_verbose)
		printk("m8xxhci_alloc_dev(usb_dev=%p)\n", usb_dev);

#if 0
	printk("dev->bus %p, dev->parent (hub) %p\n", dev->bus, dev->parent);
	dump_root_hub();
#endif

	dev = add_local_dev(usb_dev);
	if (!dev) {
		err("m8xxhci: couldn't allocate internal device");
		return -1;
	}

	return 0;
}

static int m8xxhci_free_dev(struct usb_device *usb_dev)
{
	urb_t *u;
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;
	struct list_head *head, *l, *next;
	unsigned long flags;

#if 0
	dump_events();
#endif

	if (m8xxhci_debug) printk("m8xxhci_free_dev(usb_dev=%p)\n", usb_dev);

	/* scan URB list, remove any that are still active for this device */
	spin_lock_irqsave(&urblist_lock, flags);

	head = &hp->urb_list;
	if (0) printk("head %p, next %p\n", head, head->next);

	for (l = head->next; l != head; l = next) {

		next = l->next;

		u = list_entry(l, urb_t, urb_list);

		if (0) {
			printk("head %p, l %p, next %p\n", head, l, next);
			printk("urb %p, urb->dev %p, dev %p\n",
			       u, u->dev, usb_dev);
		}

		if (u->dev == usb_dev) {
			if (0) printk("unlink urb %p\n", u);
			m8xxhci_unlink_urb(u);
		}

		if (l == next) {
			printk("urb not on list!\n");
			break;
		}
	}

	spin_unlock_irqrestore(&urblist_lock, flags);

	free_local_dev(usb_dev);

	if (m8xxhci_debug) printk("m8xxhci_free_dev(dev=%p) done\n", usb_dev);

	return 0;
}

/*
 * m8xxhci_get_current_frame_number()
 *
 * returns the current frame number for a USB bus/controller.
 */
static int m8xxhci_get_current_frame_number(struct usb_device *dev)
{
	struct m8xxhci_private *hp = (struct m8xxhci_private *)m8xxhci_ptr;

	printk("m8xxhci_get_current_frame_number(dev=%p)\n", dev);

	return hp->frame_no;
}

static int m8xxhci_submit_urb(urb_t *urb)
{
	int ret = -EINVAL;

	if (0) printk("m8xxhci_submit_urb(urb=%p)\n", urb);

	if (!urb)
		return -EINVAL;

	if (!urb->dev || !urb->dev->bus)
		return -ENODEV;

	ret = submit_urb(urb, map_pipe_to_qtype(urb->pipe));

	if (ret == -EINPROGRESS)
		return 0;

	urb->status = ret;

	return ret;
}

static int
m8xxhci_unlink_urb(urb_t *urb)
{
	int ret = 0;

	log_event(2, "m8xxhci_unlink_urb urb", (int)urb);

	if (0) printk("m8xxhci_unlink_urb(urb=%p)\n", urb);

#if 0
	if (urb->status != 0) {
		m8xxhci_dump();
		dump_events();
	}
#endif

	if (!urb)
		return -EINVAL;

	if (!urb->dev || !urb->dev->bus)
		return -ENODEV;

	ret = unlink_urb(urb, map_pipe_to_qtype(urb->pipe));
	
	if (urb->status == -EINPROGRESS) {

		if (urb->complete)
			urb->complete(urb);

		urb->status = -ENOENT;
	}

	return ret;
}

struct usb_operations m8xxhci_device_operations = {
	m8xxhci_alloc_dev,
	m8xxhci_free_dev,
	m8xxhci_get_current_frame_number,
	m8xxhci_submit_urb,
	m8xxhci_unlink_urb
};

static int __init m8xxhci_setup(void)
{
	struct m8xxhci_private *hp;
	struct usb_bus *bus;
	int i;

	if (m8xxhci_verbose) printk("m8xxhci_setup()\n");

	/* allocate controller private storage */
	hp = (struct m8xxhci_private *)kmalloc(sizeof(*hp), GFP_KERNEL);
	if (hp == 0)
		return -1;

	m8xxhci_ptr = hp;
	memset((char *)hp, 0, sizeof(struct m8xxhci_private));

	for (i = 0; i < MAX_Q_TYPES; i++) {
		hp->xmit_state[i] = XS_IDLE;
		INIT_LIST_HEAD(&hp->qe_list[i]);
		INIT_LIST_HEAD(&hp->frames[0].heads[i]);
		INIT_LIST_HEAD(&hp->frames[1].heads[i]);
	}

	INIT_LIST_HEAD(&hp->urb_list);

	init_waitqueue_head(&m8xxhci_configure);

	hp->driver_state = DS_INIT;

	/* alloc bus */
	bus = usb_alloc_bus(&m8xxhci_device_operations);

	hp->bus = bus;
	bus->hcpriv = (void *)m8xxhci_ptr;

	usb_register_bus(hp->bus);

	/* Start controller */
	if (m8xxhci_start_controller()) {
		return -1;
	}

	hp->driver_state = DS_FIND_DEVICE;
	
	return 0;
}

static int __init m8xxhci_init(void)
{
	int pid;

	if (m8xxhci_verbose)
		printk("m8xxhci: dip switches %x\n",
		       (*((volatile uint *)RPX_CSR_ADDR) & 0xf0));

	if ((*((volatile uint *)RPX_CSR_ADDR) & 0x10) == 0) {
		printk("m8xxhci_init() disabled via dip switches\n");
		return 0;
	}

	printk("m8xxhci: initializing controller\n");

	if (m8xxhci_setup()) {
		printk("m8xxhci_init() initializing failed\n");
		return -1;
	}

	MOD_INC_USE_COUNT;

#if 0
	/* simulate going idle */
	m8xxhci_events = EV_IDLE;
#endif

	pid = kernel_thread(m8xxhci_thread, NULL,
			    CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
	if (pid < 0) {
		MOD_DEC_USE_COUNT;
		return pid;
	}

#if TEST_THREAD /* hack */
	pid = kernel_thread(test_thread, NULL,
			    CLONE_FS | CLONE_FILES | CLONE_SIGHAND);

#endif

	printk("m8xxhci: initializing done\n");
	
	return 0;
}

static void __exit m8xxhci_cleanup(void)
{
	m8xxhci_disconnect_device();
}

module_init(m8xxhci_init);
module_exit(m8xxhci_cleanup);

MODULE_AUTHOR("Brad Parker");
MODULE_DESCRIPTION("USB PPC8xx Host Controller Interface driver");

