VF61 Linux SPI DMA support

spi-fsl-dspi driver does not supports DMA.
I made some driver modification for DMA support.

After code test I got next result. DMA transmit is ok. DMA receives about 50% of lost packets (some parts of buffer contents previous message). The issue is in DMA. Not all data are copied from device fifo into memory. RXCTR = 0 in rx callback transaction indicates that data is read. SPIx_RXFRn register shows a new correct value.
Do you have idea about reason of bad DMA transfers?
Code is below.

/*
 * drivers/spi/spi-fsl-dspi.c
 *
 * Copyright 2013 Freescale Semiconductor, Inc.
 *
 * Freescale DSPI driver
 * This file contains a driver for the Freescale DSPI
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 */

#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/sched.h>
#include <linux/spi/spi.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/spi/spi_bitbang.h>
#include <linux/time.h>
#include <linux/mutex.h>

#define DRIVER_NAME "fsl-dspi"

#define DMA


#define TRAN_STATE_RX_VOID		0x01
#define TRAN_STATE_TX_VOID		0x02
#define TRAN_STATE_WORD_ODD_NUM	0x04

#define DSPI_FIFO_SIZE			4

#define SPI_MCR		0x00
#define SPI_MCR_MASTER		(1 << 31)
#define SPI_MCR_PCSIS		(0x3F << 16)
#define SPI_MCR_CLR_TXF	(1 << 11)
#define SPI_MCR_CLR_RXF	(1 << 10)

#define SPI_TCR			0x08

#define SPI_CTAR(x)		(0x0c + (((x) & 0x3) * 4))
#define SPI_CTAR_FMSZ(x)	(((x) & 0x0000000f) << 27)
#define SPI_CTAR_CPOL(x)	((x) << 26)
#define SPI_CTAR_CPHA(x)	((x) << 25)
#define SPI_CTAR_LSBFE(x)	((x) << 24)
#define SPI_CTAR_PCSSCK(x)	(((x) & 0x00000003) << 22)
#define SPI_CTAR_PASC(x)	(((x) & 0x00000003) << 20)
#define SPI_CTAR_PDT(x)	(((x) & 0x00000003) << 18)
#define SPI_CTAR_PBR(x)	(((x) & 0x00000003) << 16)
#define SPI_CTAR_CSSCK(x)	(((x) & 0x0000000f) << 12)
#define SPI_CTAR_ASC(x)	(((x) & 0x0000000f) << 8)
#define SPI_CTAR_DT(x)		(((x) & 0x0000000f) << 4)
#define SPI_CTAR_BR(x)		((x) & 0x0000000f)
#define SPI_CTAR_SCALE_BITS	0xf

#define SPI_CTAR0_SLAVE	0x0c

#define SPI_SR			0x2c
#define SPI_SR_EOQF		0x10000000

#define SPI_RSER_TFFFE		0x02000000
#define SPI_RSER_TFFFD		0x01000000
#define SPI_RSER_RFDFE		0x00020000
#define SPI_RSER_RFDFD		0x00010000

#define SPI_RSER		0x30
#define SPI_RSER_EOQFE		0x10000000

#define SPI_PUSHR		0x34
#define SPI_PUSHR_CONT		(1 << 31)
#define SPI_PUSHR_CTAS(x)	(((x) & 0x00000003) << 28)
#define SPI_PUSHR_EOQ		(1 << 27)
#define SPI_PUSHR_CTCNT	(1 << 26)
#define SPI_PUSHR_PCS(x)	(((1 << x) & 0x0000003f) << 16)
#define SPI_PUSHR_TXDATA(x)	((x) & 0x0000ffff)

#define SPI_PUSHR_SLAVE	0x34

#define SPI_POPR		0x38
#define SPI_POPR_RXDATA(x)	((x) & 0x0000ffff)

#define SPI_TXFR0		0x3c
#define SPI_TXFR1		0x40
#define SPI_TXFR2		0x44
#define SPI_TXFR3		0x48
#define SPI_RXFR0		0x7c
#define SPI_RXFR1		0x80
#define SPI_RXFR2		0x84
#define SPI_RXFR3		0x88

#define SPI_FRAME_BITS(bits)	SPI_CTAR_FMSZ((bits) - 1)
#define SPI_FRAME_BITS_MASK	SPI_CTAR_FMSZ(0xf)
#define SPI_FRAME_BITS_16	SPI_CTAR_FMSZ(0xf)
#define SPI_FRAME_BITS_8	SPI_CTAR_FMSZ(0x7)

#define SPI_CS_INIT		0x01
#define SPI_CS_ASSERT	0x02
#define SPI_CS_DROP		0x04

#define DSPI_DMA_BUFSIZE	4096

struct mutex	mlock;

struct chip_data {
	u32 mcr_val;
	u32 ctar_val;
	u16 void_write_data;
};

struct fsl_dspi {
	struct spi_master	*master;
	struct platform_device	*pdev;
	struct regmap		*regmap;
	int			irq;
	struct clk		*clk;
	struct spi_transfer	*cur_transfer;
	struct spi_message	*cur_msg;
	struct chip_data	*cur_chip;
	size_t			len;
	void			*tx;
	void			*tx_end;
	void			*rx;
	void			*rx_end;
	char			dataflags;
	u8			cs;
	u16			void_write_data;
	u32			cs_change;
	wait_queue_head_t	waitq;
	u32			waitflags;
	struct fsl_dspi_dma 	*dma;
};	


/*For DMA Support*/
struct fsl_dspi_dma {
	struct dma_chan			*chan_tx;
	struct dma_chan			*chan_rx;
	u32				*dma_tx_buf;
	u32				*dma_rx_buf; //*u16
	dma_addr_t			phybase;
	dma_addr_t 			tx_mem_addr;
	dma_addr_t 			rx_mem_addr;
	struct completion		cmd_tx_complete;
	struct completion		cmd_rx_complete;
	struct dma_async_tx_descriptor *tx_desc;
	struct dma_async_tx_descriptor *rx_desc;

};




static int dspi_tx_dma(struct fsl_dspi *dspi);


static inline int is_double_byte_mode(struct fsl_dspi *dspi)
{
	unsigned int val;

	regmap_read(dspi->regmap, SPI_CTAR(0), &val);

	return ((val & SPI_FRAME_BITS_MASK) == SPI_FRAME_BITS(8)) ? 0 : 1;
}

static void hz_to_spi_baud(char *pbr, char *br, int speed_hz,
		unsigned long clkrate)
{
	/* Valid baud rate pre-scaler values */
	int pbr_tbl[4] = {2, 3, 5, 7};
	int brs[16] = { 2,	4,	6, 	8,
			16,	32,	64, 	128,
			256,	512,	1024,	2048,
			4096,	8192,	16384,	32768 };
	int scale_needed, scale, minscale = INT_MAX;
	int i, j;

	scale_needed = clkrate / speed_hz;
	if (clkrate % speed_hz)
		scale_needed++;

	for (i = 0; i < ARRAY_SIZE(brs); i++)
		for (j = 0; j < ARRAY_SIZE(pbr_tbl); j++) {
			scale = brs[i] * pbr_tbl[j];
			if (scale >= scale_needed) {
				if (scale < minscale) {
					minscale = scale;
					*br = i;
					*pbr = j;
				}
				break;
			}
		}

	if (minscale == INT_MAX) {
		pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld, we use the max prescaler value.\n",
			speed_hz, clkrate);
		*pbr = ARRAY_SIZE(pbr_tbl) - 1;
		*br =  ARRAY_SIZE(brs) - 1;
	}
}

static void ns_delay_scale(char *psc, char *sc, int delay_ns,
		unsigned long clkrate)
{
	int pscale_tbl[4] = {1, 3, 5, 7};
	int scale_needed, scale, minscale = INT_MAX;
	int i, j;
	u32 remainder;

	scale_needed = div_u64_rem((u64)delay_ns * clkrate, NSEC_PER_SEC,
			&remainder);
	if (remainder)
		scale_needed++;

	for (i = 0; i < ARRAY_SIZE(pscale_tbl); i++)
		for (j = 0; j <= SPI_CTAR_SCALE_BITS; j++) {
			scale = pscale_tbl[i] * (2 << j);
			if (scale >= scale_needed) {
				if (scale < minscale) {
					minscale = scale;
					*psc = i;
					*sc = j;
				}
				break;
			}
		}

	if (minscale == INT_MAX) {
		pr_warn("Cannot find correct scale values for %dns delay at clkrate %ld, using max prescaler value",
			delay_ns, clkrate);
		*psc = ARRAY_SIZE(pscale_tbl) - 1;
		*sc = SPI_CTAR_SCALE_BITS;
	}
}

static int dspi_transfer_write(struct fsl_dspi *dspi)
{
	int tx_count = 0;
	int tx_word;
	u16 d16;
	u8  d8;
	u32 dspi_pushr = 0;
	int first = 1;

	tx_word = is_double_byte_mode(dspi);

	/* If we are in word mode, but only have a single byte to transfer
	 * then switch to byte mode temporarily.  Will switch back at the
	 * end of the transfer.
	 */
	if (tx_word && (dspi->len == 1)) {
		dspi->dataflags |= TRAN_STATE_WORD_ODD_NUM;
		regmap_update_bits(dspi->regmap, SPI_CTAR(0),
				SPI_FRAME_BITS_MASK, SPI_FRAME_BITS(8));
		tx_word = 0;
	}

	while (dspi->len && (tx_count < DSPI_FIFO_SIZE)) {
		if (tx_word) {
			if (dspi->len == 1)
				break;

			if (!(dspi->dataflags & TRAN_STATE_TX_VOID)) {
				d16 = *(u16 *)dspi->tx;
				dspi->tx += 2;
			} else {
				d16 = dspi->void_write_data;
			}

			dspi_pushr = SPI_PUSHR_TXDATA(d16) |
				SPI_PUSHR_PCS(dspi->cs) |
				SPI_PUSHR_CTAS(0) |
				SPI_PUSHR_CONT;

			dspi->len -= 2;
		} else {
			if (!(dspi->dataflags & TRAN_STATE_TX_VOID)) {

				d8 = *(u8 *)dspi->tx;
				dspi->tx++;
			} else {
				d8 = (u8)dspi->void_write_data;
			}

			dspi_pushr = SPI_PUSHR_TXDATA(d8) |
				SPI_PUSHR_PCS(dspi->cs) |
				SPI_PUSHR_CTAS(0) |
				SPI_PUSHR_CONT;

			dspi->len--;
		}

		if (dspi->len == 0 || tx_count == DSPI_FIFO_SIZE - 1) {
			/* last transfer in the transfer */
			dspi_pushr |= SPI_PUSHR_EOQ;
			if ((dspi->cs_change) && (!dspi->len))
				dspi_pushr &= ~SPI_PUSHR_CONT;
		} else if (tx_word && (dspi->len == 1))
			dspi_pushr |= SPI_PUSHR_EOQ;

		if (first) {
			first = 0;
			dspi_pushr |= SPI_PUSHR_CTCNT; /* clear counter */
		}

		regmap_write(dspi->regmap, SPI_PUSHR, dspi_pushr);

		tx_count++;
	}

	return tx_count * (tx_word + 1);
}

static int dspi_transfer_read(struct fsl_dspi *dspi)
{
	int rx_count = 0;
	int rx_word = is_double_byte_mode(dspi);
	u16 d;

	while ((dspi->rx < dspi->rx_end)
			&& (rx_count < DSPI_FIFO_SIZE)) {
		if (rx_word) {
			unsigned int val;

			if ((dspi->rx_end - dspi->rx) == 1)
				break;

			regmap_read(dspi->regmap, SPI_POPR, &val);
			d = SPI_POPR_RXDATA(val);

			if (!(dspi->dataflags & TRAN_STATE_RX_VOID))
				*(u16 *)dspi->rx = d;
			dspi->rx += 2;

		} else {
			unsigned int val;

			regmap_read(dspi->regmap, SPI_POPR, &val);
			d = SPI_POPR_RXDATA(val);
			if (!(dspi->dataflags & TRAN_STATE_RX_VOID))
				*(u8 *)dspi->rx = d;
			dspi->rx++;
		}
		rx_count++;
	}

	return rx_count;
}

static int dspi_transfer_one_message(struct spi_master *master,
		struct spi_message *message)
{
	struct fsl_dspi *dspi = spi_master_get_devdata(master);
	struct spi_device *spi = message->spi;
	struct spi_transfer *transfer;
	int status = 0;
	int i = 0;
	u16 * val = 0;
	message->actual_length = 0;

	list_for_each_entry(transfer, &message->transfers, transfer_list) {
		dspi->cur_transfer = transfer;
		dspi->cur_msg = message;
		dspi->cur_chip = spi_get_ctldata(spi);
		dspi->cs = spi->chip_select;
		if (dspi->cur_transfer->transfer_list.next
				== &dspi->cur_msg->transfers)
			transfer->cs_change = 1;
		dspi->cs_change = transfer->cs_change;
		dspi->void_write_data = dspi->cur_chip->void_write_data;

		dspi->dataflags = 0;
		dspi->tx = (void *)transfer->tx_buf;
		dspi->tx_end = dspi->tx + transfer->len;
		dspi->rx = transfer->rx_buf;
		dspi->rx_end = dspi->rx + transfer->len;
		dspi->len = transfer->len;


#ifdef DMA
//		regmap_write(dspi->regmap, SPI_RSER,SPI_RSER_TFFFE | SPI_RSER_TFFFD
//				| SPI_RSER_RFDFE | SPI_RSER_RFDFD);
#endif

		if (!dspi->rx)
			dspi->dataflags |= TRAN_STATE_RX_VOID;

		if (!dspi->tx)
			dspi->dataflags |= TRAN_STATE_TX_VOID;

		regmap_write(dspi->regmap, SPI_MCR, dspi->cur_chip->mcr_val);
		regmap_update_bits(dspi->regmap, SPI_MCR,
				SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF,
				SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF);
		regmap_write(dspi->regmap, SPI_CTAR(0),
				dspi->cur_chip->ctar_val);
		if (transfer->speed_hz)
			regmap_write(dspi->regmap, SPI_CTAR(0),
					dspi->cur_chip->ctar_val);

#ifdef DMA
		
		if(transfer->tx_buf == NULL) //if tx_buff == NULL then send dummy
		{
			for(i=0;i<dspi->len/2-1;i++)
			{
				dspi->dma->dma_tx_buf[i]=SPI_PUSHR_PCS(0)|SPI_PUSHR_CONT;
			}
			dspi->dma->dma_tx_buf[dspi->len-1]=SPI_PUSHR_PCS(0);
		}else{

			val = (u16 *)transfer->tx_buf;
			for(i=0;i<(dspi->len/2-1);i++)
			{
				dspi->dma->dma_tx_buf[i] = (u32)val[i]|SPI_PUSHR_CONT|SPI_PUSHR_PCS(0);
		
			}
			dspi->dma->dma_tx_buf[i] = (u32)val[i]|SPI_PUSHR_PCS(0);

		}

		dev_info(&dspi->pdev->dev, "From SAIT driver %u", dspi->len);

	/*
		for(i=0;i<3;i++)
		dma->dma_tx_buf[i]=i|SPI_PUSHR_PCS(0)|SPI_PUSHR_CONT;
		dma->dma_tx_buf[3]=SPI_PUSHR_PCS(0)|3;
	*/


		regmap_write(dspi->regmap, SPI_RSER,SPI_RSER_TFFFE | SPI_RSER_TFFFD
				| SPI_RSER_RFDFE | SPI_RSER_RFDFD);
		message->actual_length = DSPI_DMA_BUFSIZE;
		dspi_tx_dma(dspi);
#else

		regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE);

		message->actual_length += dspi_transfer_write(dspi);

		if (wait_event_interruptible(dspi->waitq, dspi->waitflags))
			dev_err(&dspi->pdev->dev, "wait transfer complete fail!\n");
		dspi->waitflags = 0;

		if (transfer->delay_usecs)
			udelay(transfer->delay_usecs);
#endif
	}

	message->status = status;
	//mutex_lock (&mlock);
	spi_finalize_current_message(master);
	//mutex_unlock (&mlock);
	return status;
}

static int dspi_setup(struct spi_device *spi)
{
	struct chip_data *chip;
	struct fsl_dspi *dspi = spi_master_get_devdata(spi->master);
	u32 cs_sck_delay = 0, sck_cs_delay = 0;
	unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
	unsigned char pasc = 0, asc = 0, fmsz = 0;
	unsigned long clkrate;

	if ((spi->bits_per_word >= 4) && (spi->bits_per_word <= 16)) {
		fmsz = spi->bits_per_word - 1;
	} else {
		pr_err("Invalid wordsize\n");
		return -ENODEV;
	}

	/* Only alloc on first setup */
	chip = spi_get_ctldata(spi);
	if (chip == NULL) {
		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
		if (!chip)
			return -ENOMEM;
	}

	of_property_read_u32(spi->dev.of_node, "fsl,spi-cs-sck-delay",
			&cs_sck_delay);

	of_property_read_u32(spi->dev.of_node, "fsl,spi-sck-cs-delay",
			&sck_cs_delay);

	chip->mcr_val = SPI_MCR_MASTER | SPI_MCR_PCSIS |
		SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF;

	chip->void_write_data = 0;

	clkrate = clk_get_rate(dspi->clk);
	//spi->max_speed_hz = 8000000; //SRK
	hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);

	/* Set PCS to SCK delay scale values */
	ns_delay_scale(&pcssck, &cssck, cs_sck_delay, clkrate);

	/* Set After SCK delay scale values */
	ns_delay_scale(&pasc, &asc, sck_cs_delay, clkrate);
	
	//SRK changes, for 3.0Mhz
	cssck = 0x3;
	asc = 0x3;

	chip->ctar_val =  SPI_CTAR_FMSZ(fmsz)
		| SPI_CTAR_CPOL(spi->mode & SPI_CPOL ? 1 : 0)
		| SPI_CTAR_CPHA(spi->mode & SPI_CPHA ? 1 : 0)
		| SPI_CTAR_LSBFE(spi->mode & SPI_LSB_FIRST ? 1 : 0)
		| SPI_CTAR_PCSSCK(pcssck)
		| SPI_CTAR_CSSCK(cssck)
		| SPI_CTAR_PASC(pasc)
		| SPI_CTAR_ASC(asc)
		| SPI_CTAR_PBR(pbr)
		| SPI_CTAR_BR(br);

	spi_set_ctldata(spi, chip);

	return 0;
}

static void dspi_cleanup(struct spi_device *spi)
{
	struct chip_data *chip = spi_get_ctldata((struct spi_device *)spi);

	dev_dbg(&spi->dev, "spi_device %u.%u cleanup\n",
			spi->master->bus_num, spi->chip_select);

	kfree(chip);
}

static irqreturn_t dspi_interrupt(int irq, void *dev_id)
{
	struct fsl_dspi *dspi = (struct fsl_dspi *)dev_id;

	struct spi_message *msg = dspi->cur_msg;

	regmap_write(dspi->regmap, SPI_SR, SPI_SR_EOQF);
	dspi_transfer_read(dspi);

	if (!dspi->len) {
		if (dspi->dataflags & TRAN_STATE_WORD_ODD_NUM)
			regmap_update_bits(dspi->regmap, SPI_CTAR(0),
			SPI_FRAME_BITS_MASK, SPI_FRAME_BITS(16));

		dspi->waitflags = 1;
		wake_up_interruptible(&dspi->waitq);
	} else
		msg->actual_length += dspi_transfer_write(dspi);

	return IRQ_HANDLED;
}

static const struct of_device_id fsl_dspi_dt_ids[] = {
	{ .compatible = "fsl,vf610-dspi", .data = NULL, },
	{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, fsl_dspi_dt_ids);

#ifdef CONFIG_PM_SLEEP
static int dspi_suspend(struct device *dev)
{
	struct spi_master *master = dev_get_drvdata(dev);
	struct fsl_dspi *dspi = spi_master_get_devdata(master);

	spi_master_suspend(master);
	clk_disable_unprepare(dspi->clk);

	pinctrl_pm_select_sleep_state(dev);

	return 0;
}

static int dspi_resume(struct device *dev)
{
	struct spi_master *master = dev_get_drvdata(dev);
	struct fsl_dspi *dspi = spi_master_get_devdata(master);

	pinctrl_pm_select_default_state(dev);

	clk_prepare_enable(dspi->clk);
	spi_master_resume(master);

	return 0;
}
#endif /* CONFIG_PM_SLEEP */

static SIMPLE_DEV_PM_OPS(dspi_pm, dspi_suspend, dspi_resume);

static const struct regmap_config dspi_regmap_config = {
	.reg_bits = 32,
	.val_bits = 32,
	.reg_stride = 4,
	.max_register = 0x88,
};

#ifdef DMA

static void dspi_tx_dma_callback(void *arg)
{
	struct fsl_dspi *dspi = (struct fsl_dspi *)arg;
	struct fsl_dspi_dma *dma = dspi->dma;

	dev_info(&dspi->pdev->dev, "tx callback");

/*	dma_unmap_single(dma->chan_tx->device->dev, dma->tx_mem_addr,
			dspi->len*2, DMA_MEM_TO_DEV);*/

	dma_sync_single_for_cpu(dma->chan_tx->device->dev, dma->tx_mem_addr,
			dspi->len*2, DMA_MEM_TO_DEV);


	complete(&dma->cmd_tx_complete);
}


static void dspi_rx_dma_callback(void *arg)
{
	struct fsl_dspi *dspi = (struct fsl_dspi *)arg;
	struct fsl_dspi_dma *dma = dspi->dma;
	u16 * val;
	int i;	

	dma_sync_single_for_cpu(dma->chan_rx->device->dev, dma->rx_mem_addr,
			dspi->len*2, DMA_MEM_TO_DEV);

	//--------------
	if(dspi->rx != NULL) //if rx_buff != NULL then rx data copy
	{
		val = (u16 *)dspi->rx;
		for(i=0;i<(dspi->len/2);i++)
		{
			dev_info(&dspi->pdev->dev, "rx data %X ",dspi->dma->dma_rx_buf[i]);
			val[i] = dspi->dma->dma_rx_buf[i];
		}
	}

	dev_info(&dspi->pdev->dev, "rx callback");

		complete(&dma->cmd_rx_complete);
	//mutex_unlock (&mlock);
}


static int dspi_tx_dma(struct fsl_dspi *dspi)
{
	struct fsl_dspi_dma *dma = dspi->dma;
	struct device *dev = &dspi->pdev->dev;




	dma->tx_desc = dmaengine_prep_slave_single(dma->chan_tx, dma->tx_mem_addr,
					dspi->len*2, DMA_MEM_TO_DEV,
					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
	if (!dma->tx_desc) {
		dev_err(dev, "Not able to get desc for DMA xfer\n");
		return 0;

	}

	dma->tx_desc->callback = dspi_tx_dma_callback;
	dma->tx_desc->callback_param = dspi;
	if (dma_submit_error(dmaengine_submit(dma->tx_desc))) {
		dev_err(dev, "DMA submit failed\n");
		return 0;
	}




	dma->rx_desc = dmaengine_prep_slave_single(dma->chan_rx, dma->rx_mem_addr,
					dspi->len*2, DMA_MEM_TO_DEV,
					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
	if (!dma->rx_desc) {
		dev_err(dev, "Not able to get desc for DMA xfer\n");
		return 0;

	}

	dma->rx_desc->callback = dspi_rx_dma_callback;
	dma->rx_desc->callback_param = dspi;
	if (dma_submit_error(dmaengine_submit(dma->rx_desc))) {
		dev_err(dev, "DMA submit failed\n");
		return 0;
	}



	reinit_completion(&dspi->dma->cmd_rx_complete);
	reinit_completion(&dspi->dma->cmd_tx_complete);

	dma_sync_single_for_device(dma->chan_rx->device->dev, dma->dma_rx_buf, dspi->len*2, DMA_MEM_TO_DEV);
	dma_sync_single_for_device(dma->chan_tx->device->dev, dma->dma_tx_buf, dspi->len*2, DMA_MEM_TO_DEV);


	dma_async_issue_pending(dma->chan_rx);
	dev_info(dev, "async_rx");
	dma_async_issue_pending(dma->chan_tx);
	dev_info(dev, "async_tx");
	if(!wait_for_completion_timeout(&dspi->dma->cmd_tx_complete, msecs_to_jiffies(3000))){
		dev_err(dev, "DMA TX Timeout\n");
			wait_for_completion_timeout(&dspi->dma->cmd_rx_complete, msecs_to_jiffies(3000));
	}
	dev_info(dev, "wait for complition end");

	return 0;
}

/* Functions for DMA support */
static int dspi_request_dma(struct fsl_dspi *dspi, dma_addr_t phy_addr)
{
	struct fsl_dspi_dma *dma;
	struct dma_slave_config cfg;
	struct device *dev = &dspi->pdev->dev;


	
	int ret = -EINVAL;


	dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
	dma->phybase = phy_addr;

	if (!dma)
		return 0;

	
	dma->dma_tx_buf = devm_kzalloc(dev,DSPI_DMA_BUFSIZE, GFP_DMA);
	dma->dma_rx_buf = devm_kzalloc(dev,DSPI_DMA_BUFSIZE, GFP_DMA);


	dma->chan_rx = dma_request_slave_channel(dev, "rx");
	if (dma->chan_rx) {
		cfg.direction = DMA_DEV_TO_MEM;
		cfg.dst_addr = 0;
		cfg.src_addr = dma->phybase + SPI_POPR;
		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
		cfg.src_maxburst = 1;
		ret = dmaengine_slave_config(dma->chan_rx, &cfg);
		if (!ret)
			dev_info(dev, "Configed DSPI rx channel");
		else
			return ret;
	}

	dma->chan_tx = dma_request_slave_channel(dev, "tx");
	if (dma->chan_tx) {
		cfg.direction = DMA_MEM_TO_DEV;
		cfg.dst_addr = dma->phybase + SPI_PUSHR;
		cfg.src_addr = 0;
		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
		cfg.dst_maxburst = 1;
		ret = dmaengine_slave_config(dma->chan_tx, &cfg);
		if (!ret)
			dev_info(dev, "Configed DSPI tx channel");
		else
			return ret;
	}


	dspi->dma = dma;
	init_completion(&dma->cmd_tx_complete);
	init_completion(&dma->cmd_rx_complete);
		
	//-----------

	dev_info(dev, "using %s (tx) and %s (rx) for DMA transfers\n",
		dma_chan_name(dma->chan_tx), dma_chan_name(dma->chan_rx));

	//----------TX-----------



	dma->tx_mem_addr = dma_map_single(dma->chan_tx->device->dev, dma->dma_tx_buf, DSPI_DMA_BUFSIZE, DMA_MEM_TO_DEV);

	dev_info(dev, "dspi_tx_dma");

	if (dma_mapping_error(dma->chan_tx->device->dev, dma->tx_mem_addr)) {
		dev_info(dev, "dma_mapping_error");
	}


	//---------------RX----------------

	dma->rx_mem_addr = dma_map_single(dma->chan_rx->device->dev, dma->dma_rx_buf, DSPI_DMA_BUFSIZE, DMA_MEM_TO_DEV);

	dev_info(dev, "dspi_rx_dma");

	if (dma_mapping_error(dma->chan_rx->device->dev, dma->rx_mem_addr)) {
		dev_info(dev, "dma_mapping_error");
	}
	//---------------------------------

	
	return ret;
}

static void dspi_release_dma(struct fsl_dspi *dspi)
{
struct fsl_dspi_dma *dma;
	dma = dspi->dma;
	if (dma->chan_tx)
		dma_release_channel(dma->chan_tx);
	if (dma->chan_rx)
		dma_release_channel(dma->chan_rx);
	if (dspi->dma)
		devm_kfree(&dspi->pdev->dev, dspi->dma);
}
#endif

static int dspi_probe(struct platform_device *pdev)
{
	struct device_node *np = pdev->dev.of_node;
	struct spi_master *master;
	struct fsl_dspi *dspi;
	struct resource *res;
	void __iomem *base;
	int ret = 0, cs_num, bus_num;
	dma_addr_t phy_addr;

	mutex_init(&mlock);
	master = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi));
	if (!master)
		return -ENOMEM;

	dspi = spi_master_get_devdata(master);
	dspi->pdev = pdev;
	dspi->master = master;

	master->transfer = NULL;
	master->setup = dspi_setup;
	master->transfer_one_message = dspi_transfer_one_message;
	master->dev.of_node = pdev->dev.of_node;

	master->cleanup = dspi_cleanup;
	master->mode_bits = SPI_CPOL | SPI_CPHA;
	master->bits_per_word_mask = SPI_BPW_MASK(4) | SPI_BPW_MASK(8) |
					SPI_BPW_MASK(16);

	ret = of_property_read_u32(np, "spi-num-chipselects", &cs_num);
	if (ret < 0) {
		dev_err(&pdev->dev, "can't get spi-num-chipselects\n");
		goto out_master_put;
	}
	master->num_chipselect = cs_num;

	ret = of_property_read_u32(np, "bus-num", &bus_num);
	if (ret < 0) {
		dev_err(&pdev->dev, "can't get bus-num\n");
		goto out_master_put;
	}
	master->bus_num = bus_num;

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	base = devm_ioremap_resource(&pdev->dev, res);
	if (IS_ERR(base)) {
		ret = PTR_ERR(base);
		goto out_master_put;
	}

	dspi->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "dspi", base,
						&dspi_regmap_config);
	if (IS_ERR(dspi->regmap)) {
		dev_err(&pdev->dev, "failed to init regmap: %ld\n",
				PTR_ERR(dspi->regmap));
		return PTR_ERR(dspi->regmap);
	}

	dspi->irq = platform_get_irq(pdev, 0);
	if (dspi->irq < 0) {
		dev_err(&pdev->dev, "can't get platform irq\n");
		ret = dspi->irq;
		goto out_master_put;
	}

	ret = devm_request_irq(&pdev->dev, dspi->irq, dspi_interrupt, 0,
			pdev->name, dspi);
	if (ret < 0) {
		dev_err(&pdev->dev, "Unable to attach DSPI interrupt\n");
		goto out_master_put;
	}

	dspi->clk = devm_clk_get(&pdev->dev, "dspi");
	if (IS_ERR(dspi->clk)) {
		ret = PTR_ERR(dspi->clk);
		dev_err(&pdev->dev, "unable to get clock\n");
		goto out_master_put;
	}
	clk_prepare_enable(dspi->clk);

#ifdef DMA
	/*Init DMA config if supported*/
		phy_addr = (dma_addr_t)res->start;
		dev_err(&pdev->dev, "phy_addr= %X \n",phy_addr);

		if (dspi_request_dma(dspi,phy_addr)) {
			dev_err(&pdev->dev, "request dma channel fail\n");
			dspi_release_dma(dspi);
		}
#endif

	
	init_waitqueue_head(&dspi->waitq);
	platform_set_drvdata(pdev, master);

	ret = spi_register_master(master);
	if (ret != 0) {
		dev_err(&pdev->dev, "Problem registering DSPI master\n");
		goto out_clk_put;
	}

	return ret;

out_clk_put:
	clk_disable_unprepare(dspi->clk);
out_master_put:
	spi_master_put(master);

	return ret;
}

static int dspi_remove(struct platform_device *pdev)
{
	struct spi_master *master = platform_get_drvdata(pdev);
	struct fsl_dspi *dspi = spi_master_get_devdata(master);

	/* Disconnect from the SPI framework */
	clk_disable_unprepare(dspi->clk);
	spi_unregister_master(dspi->master);
	spi_master_put(dspi->master);

	return 0;
}

static struct platform_driver fsl_dspi_driver = {
	.driver.name    = DRIVER_NAME,
	.driver.of_match_table = fsl_dspi_dt_ids,
	.driver.owner   = THIS_MODULE,
	.driver.pm = &dspi_pm,
	.probe          = dspi_probe,
	.remove		= dspi_remove,
};
module_platform_driver(fsl_dspi_driver);

MODULE_DESCRIPTION("Freescale DSPI Controller Driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" DRIVER_NAME);

What could be an issue is CPU caches. Make sure to call dma_sync_single_for_cpu for the RX buffer to invalidate buffers on CPU side…

There has been a patchset implementing DMA for DSPI on the mailing list quite some time ago:
http://thread.gmane.org/gmane.linux.ports.arm.kernel/304438

Some weeks ago I forward ported and applied those patches on a newer tree, although, I did not test them yet. Quite likely that those do not work at all at the moment… But I guess that implementation worked at one point, so it might give you a hint what is wrong in your implementation. I pushed a rebased version to our git repository:
http://git.toradex.com/cgit/linux-toradex.git/log/?h=toradex_vf_4.4-next-dspi-dma

Stefan, thank you for your support.

  1. I tried to use dma_sync_single_for_cpu. It doesn’t have influence to my issue.
  2. Code from git repo is ok, but DMA operate only with 4 word memory block size (size of fifo). And we have gaps between every 4 word in transaction. The gaps is not OK for me.

At least the allocated buffer size is DSPI_DMA_BUFSIZE, which should be 4 times DSPI_FIFO_SIZE… I guess you could also easily increase that.

What looks wrong to me in that implementation is that the RX DMA gets kicked off in the TX callback. I guess this really limits one transmit to FIFO length… I guess RX DMA should be enabled before kicking off TX DMA, so that more than FIFO length can be received without any FIFO overflows…

#define DSPI_FIFO_SIZE			4
#define DSPI_DMA_BUFSIZE		(DSPI_FIFO_SIZE * 4)

Fifo width is 4 bytes.
Fifo depth is 4 entries
I tried to increase it but it doesn’t operate correct.

Yes, RX and TX should be synchronized. In my case RX is enabled before TX:

 dma_async_issue_pending(dma->chan_rx);
 dma_async_issue_pending(dma->chan_tx);

I think, the reason of wrong dma transmission is wrong DMA channels synchronization in Freescale DMA driver.

Example of RX/TX data:
10459 Send:    35 36 37 38 39 3a 3b 3c 3d 3e 3f 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 50 51 52 53 54 55 56 
10459 receive: 35 36 37 38 39 3a 3b 3c 3c 3d 3e 3f 40 41 42 43 44 45 46 47 48 49 4a 4b 4d 4e 4f 50 51 52 53 54 55 56   buff_ERROR: 16

RX data should be the same as TX (MOSI and MISO is shorted)
As you can see damaged bytes are in the middle of packet ( 3c 3d 3e 3f 40 41 42 43 44 45 46 47 48 49 4a 4b - bytes from previous packet ).

Also DMA SPI driver code was upgraded (see first question).

The reason of wrong communication is wrong direction for RX code. It should be DMA_DEV_TO_MEM. Now it operate without dropped packet.

Which image version do you use? In our latest 2.6.1 release SPI driver has DMA enabled by default. Using SPI will implicitly use DMA in backend for transfers.

Hi,

This is Ramanji,

I am currently working on SPI with DMA. I saw above driver it looks perfect. In our case am facing one problem, when kernel boots am getting kernel panic.

I could not understand why it’s getting like this.
The above driver is compatible for freescale board, because i have seen the SPI register set it looks like same.

Could you please help on this. I think now you are in expert in SPI with DMA.

If possible could you please share your application code, am using spidev_test.c application code to test our SPI with DMA driver.

Please help on this issue.
Thanks inadvance.

Thanks & Regards,
Ramanjineyulu.