diff options
| author | Chris Metcalf <cmetcalf@tilera.com> | 2012-04-04 16:58:27 -0400 | 
|---|---|---|
| committer | Chris Metcalf <cmetcalf@tilera.com> | 2012-07-11 16:04:54 -0400 | 
| commit | 6369798037c0e915fc3e3844083f2aeecb924c9d (patch) | |
| tree | 35e01293dc57d820fc0c89f110e52d4fad3a703d | |
| parent | 44e56967100f22a21abade38821018ba03d0a39f (diff) | |
arch/tile: common DMA code for the GXIO IORPC subsystem
The dma_queue support is used by both the mPipe (networking)
and Trio (PCI) hardware shims on tilegx.  This common code is
selected when either of those drivers is built.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
| -rw-r--r-- | arch/tile/gxio/Kconfig | 6 | ||||
| -rw-r--r-- | arch/tile/gxio/Makefile | 1 | ||||
| -rw-r--r-- | arch/tile/gxio/dma_queue.c | 176 | ||||
| -rw-r--r-- | arch/tile/include/gxio/dma_queue.h | 161 | 
4 files changed, 344 insertions, 0 deletions
| diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig index 8eff47fe1236..ecd076c8cfd5 100644 --- a/arch/tile/gxio/Kconfig +++ b/arch/tile/gxio/Kconfig @@ -3,3 +3,9 @@  config TILE_GXIO  	bool  	depends on TILEGX + +# Support direct access to the common I/O DMA facility within the +# TILE-Gx mPIPE and Trio hardware from kernel space. +config TILE_GXIO_DMA +	bool +	select TILE_GXIO diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile index db1ee2863d8e..97ab468fb8c5 100644 --- a/arch/tile/gxio/Makefile +++ b/arch/tile/gxio/Makefile @@ -3,3 +3,4 @@  #  obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o +obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c new file mode 100644 index 000000000000..baa60357f8ba --- /dev/null +++ b/arch/tile/gxio/dma_queue.c @@ -0,0 +1,176 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + *   This program is free software; you can redistribute it and/or + *   modify it under the terms of the GNU General Public License + *   as published by the Free Software Foundation, version 2. + * + *   This program is distributed in the hope that it will be useful, but + *   WITHOUT ANY WARRANTY; without even the implied warranty of + *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + *   NON INFRINGEMENT.  See the GNU General Public License for + *   more details. + */ + +#include <linux/io.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <gxio/dma_queue.h> + +/* Wait for a memory read to complete. */ +#define wait_for_value(val)                             \ +  __asm__ __volatile__("move %0, %0" :: "r"(val)) + +/* The index is in the low 16. */ +#define DMA_QUEUE_INDEX_MASK ((1 << 16) - 1) + +/* + * The hardware descriptor-ring type. + * This matches the types used by mpipe (MPIPE_EDMA_POST_REGION_VAL_t) + * and trio (TRIO_PUSH_DMA_REGION_VAL_t or TRIO_PULL_DMA_REGION_VAL_t). + * See those types for more documentation on the individual fields. + */ +typedef union { +	struct { +#ifndef __BIG_ENDIAN__ +		uint64_t ring_idx:16; +		uint64_t count:16; +		uint64_t gen:1; +		uint64_t __reserved:31; +#else +		uint64_t __reserved:31; +		uint64_t gen:1; +		uint64_t count:16; +		uint64_t ring_idx:16; +#endif +	}; +	uint64_t word; +} __gxio_ring_t; + +void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, +			   void *post_region_addr, unsigned int num_entries) +{ +	/* +	 * Limit 65536 entry rings to 65535 credits because we only have a +	 * 16 bit completion counter. +	 */ +	int64_t credits = (num_entries < 65536) ? num_entries : 65535; + +	memset(dma_queue, 0, sizeof(*dma_queue)); + +	dma_queue->post_region_addr = post_region_addr; +	dma_queue->hw_complete_count = 0; +	dma_queue->credits_and_next_index = credits << DMA_QUEUE_CREDIT_SHIFT; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_init); + +void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue) +{ +	__gxio_ring_t val; +	uint64_t count; +	uint64_t delta; +	uint64_t new_count; + +	/* +	 * Read the 64-bit completion count without touching the cache, so +	 * we later avoid having to evict any sharers of this cache line +	 * when we update it below. +	 */ +	uint64_t orig_hw_complete_count = +		cmpxchg(&dma_queue->hw_complete_count, +			-1, -1); + +	/* Make sure the load completes before we access the hardware. */ +	wait_for_value(orig_hw_complete_count); + +	/* Read the 16-bit count of how many packets it has completed. */ +	val.word = __gxio_mmio_read(dma_queue->post_region_addr); +	count = val.count; + +	/* +	 * Calculate the number of completions since we last updated the +	 * 64-bit counter.  It's safe to ignore the high bits because the +	 * maximum credit value is 65535. +	 */ +	delta = (count - orig_hw_complete_count) & 0xffff; +	if (delta == 0) +		return; + +	/* +	 * Try to write back the count, advanced by delta.  If we race with +	 * another thread, this might fail, in which case we return +	 * immediately on the assumption that some credits are (or at least +	 * were) available. +	 */ +	new_count = orig_hw_complete_count + delta; +	if (cmpxchg(&dma_queue->hw_complete_count, +		    orig_hw_complete_count, +		    new_count) != orig_hw_complete_count) +		return; + +	/* +	 * We succeeded in advancing the completion count; add back the +	 * corresponding number of egress credits. +	 */ +	__insn_fetchadd(&dma_queue->credits_and_next_index, +			(delta << DMA_QUEUE_CREDIT_SHIFT)); +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_update_credits); + +/* + * A separate 'blocked' method for put() so that backtraces and + * profiles will clearly indicate that we're wasting time spinning on + * egress availability rather than actually posting commands. + */ +int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, +					  int64_t modifier) +{ +	int backoff = 16; +	int64_t old; + +	do { +		int i; +		/* Back off to avoid spamming memory networks. */ +		for (i = backoff; i > 0; i--) +			__insn_mfspr(SPR_PASS); + +		/* Check credits again. */ +		__gxio_dma_queue_update_credits(dma_queue); +		old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, +					 modifier); + +		/* Calculate bounded exponential backoff for next iteration. */ +		if (backoff < 256) +			backoff *= 2; +	} while (old + modifier < 0); + +	return old; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_wait_for_credits); + +int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, +				     unsigned int num, int wait) +{ +	return __gxio_dma_queue_reserve(dma_queue, num, wait != 0, true); +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_reserve_aux); + +int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, +				 int64_t completion_slot, int update) +{ +	if (update) { +		if (ACCESS_ONCE(dma_queue->hw_complete_count) > +		    completion_slot) +			return 1; + +		__gxio_dma_queue_update_credits(dma_queue); +	} + +	return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h new file mode 100644 index 000000000000..00654feb7db0 --- /dev/null +++ b/arch/tile/include/gxio/dma_queue.h @@ -0,0 +1,161 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + *   This program is free software; you can redistribute it and/or + *   modify it under the terms of the GNU General Public License + *   as published by the Free Software Foundation, version 2. + * + *   This program is distributed in the hope that it will be useful, but + *   WITHOUT ANY WARRANTY; without even the implied warranty of + *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + *   NON INFRINGEMENT.  See the GNU General Public License for + *   more details. + */ + +#ifndef _GXIO_DMA_QUEUE_H_ +#define _GXIO_DMA_QUEUE_H_ + +/* + * DMA queue management APIs shared between TRIO and mPIPE. + */ + +#include "common.h" + +/* The credit counter lives in the high 32 bits. */ +#define DMA_QUEUE_CREDIT_SHIFT 32 + +/* + * State object that tracks a DMA queue's head and tail indices, as + * well as the number of commands posted and completed.  The + * structure is accessed via a thread-safe, lock-free algorithm. + */ +typedef struct { +	/* +	 * Address of a MPIPE_EDMA_POST_REGION_VAL_t, +	 * TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t +	 * register.  These register have identical encodings and provide +	 * information about how many commands have been processed. +	 */ +	void *post_region_addr; + +	/* +	 * A lazily-updated count of how many edescs the hardware has +	 * completed. +	 */ +	uint64_t hw_complete_count __attribute__ ((aligned(64))); + +	/* +	 * High 32 bits are a count of available egress command credits, +	 * low 24 bits are the next egress "slot". +	 */ +	int64_t credits_and_next_index; + +} __gxio_dma_queue_t; + +/* Initialize a dma queue. */ +extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, +				  void *post_region_addr, +				  unsigned int num_entries); + +/* + * Update the "credits_and_next_index" and "hw_complete_count" fields + * based on pending hardware completions.  Note that some other thread + * may have already done this and, importantly, may still be in the + * process of updating "credits_and_next_index". + */ +extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue); + +/* Wait for credits to become available. */ +extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, +						 int64_t modifier); + +/* Reserve slots in the queue, optionally waiting for slots to become + * available, and optionally returning a "completion_slot" suitable for + * direct comparison to "hw_complete_count". + */ +static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, +					       unsigned int num, bool wait, +					       bool completion) +{ +	uint64_t slot; + +	/* +	 * Try to reserve 'num' egress command slots.  We do this by +	 * constructing a constant that subtracts N credits and adds N to +	 * the index, and using fetchaddgez to only apply it if the credits +	 * count doesn't go negative. +	 */ +	int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num; +	int64_t old = +		__insn_fetchaddgez(&dma_queue->credits_and_next_index, +				   modifier); + +	if (unlikely(old + modifier < 0)) { +		/* +		 * We're out of credits.  Try once to get more by checking for +		 * completed egress commands.  If that fails, wait or fail. +		 */ +		__gxio_dma_queue_update_credits(dma_queue); +		old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, +					 modifier); +		if (old + modifier < 0) { +			if (wait) +				old = __gxio_dma_queue_wait_for_credits +					(dma_queue, modifier); +			else +				return GXIO_ERR_DMA_CREDITS; +		} +	} + +	/* The bottom 24 bits of old encode the "slot". */ +	slot = (old & 0xffffff); + +	if (completion) { +		/* +		 * A "completion_slot" is a "slot" which can be compared to +		 * "hw_complete_count" at any time in the future.  To convert +		 * "slot" into a "completion_slot", we access "hw_complete_count" +		 * once (knowing that we have reserved a slot, and thus, it will +		 * be "basically" accurate), and combine its high 40 bits with +		 * the 24 bit "slot", and handle "wrapping" by adding "1 << 24" +		 * if the result is LESS than "hw_complete_count". +		 */ +		uint64_t complete; +		complete = ACCESS_ONCE(dma_queue->hw_complete_count); +		slot |= (complete & 0xffffffffff000000); +		if (slot < complete) +			slot += 0x1000000; +	} + +	/* +	 * If any of our slots mod 256 were equivalent to 0, go ahead and +	 * collect some egress credits, and update "hw_complete_count", and +	 * make sure the index doesn't overflow into the credits. +	 */ +	if (unlikely(((old + num) & 0xff) < num)) { +		__gxio_dma_queue_update_credits(dma_queue); + +		/* Make sure the index doesn't overflow into the credits. */ +#ifdef __BIG_ENDIAN__ +		*(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0; +#else +		*(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0; +#endif +	} + +	return slot; +} + +/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */ +extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, +					    unsigned int num, int wait); + +/* Check whether a particular "completion slot" has completed. + * + * Note that this function requires a "completion slot", and thus + * cannot be used with the result of any "reserve_fast" function. + */ +extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, +					int64_t completion_slot, int update); + +#endif /* !_GXIO_DMA_QUEUE_H_ */ | 
