00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cmplx_dot_prod_q31.c 00009 * 00010 * Description: Q31 complex dot product 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Version 1.0.10 2011/7/15 00015 * Big Endian support added and Merged M0 and M3/M4 Source code. 00016 * 00017 * Version 1.0.3 2010/11/29 00018 * Re-organized the CMSIS folders and updated documentation. 00019 * 00020 * Version 1.0.2 2010/11/11 00021 * Documentation updated. 00022 * 00023 * Version 1.0.1 2010/10/05 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 1.0.0 2010/09/20 00027 * Production release and review comments incorporated. 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00060 void arm_cmplx_dot_prod_q31( 00061 q31_t * pSrcA, 00062 q31_t * pSrcB, 00063 uint32_t numSamples, 00064 q63_t * realResult, 00065 q63_t * imagResult) 00066 { 00067 q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */ 00068 00069 #ifndef ARM_MATH_CM0 00070 00071 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00072 uint32_t blkCnt; /* loop counter */ 00073 00074 00075 /*loop Unrolling */ 00076 blkCnt = numSamples >> 2u; 00077 00078 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00079 ** a second loop below computes the remaining 1 to 3 samples. */ 00080 while(blkCnt > 0u) 00081 { 00082 /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 00083 /* Convert real data in 2.62 to 16.48 by 14 right shifts */ 00084 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00085 /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 00086 /* Convert imag data in 2.62 to 16.48 by 14 right shifts */ 00087 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00088 00089 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00090 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00091 00092 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00093 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00094 00095 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00096 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00097 00098 00099 /* Decrement the loop counter */ 00100 blkCnt--; 00101 } 00102 00103 /* If the numSamples is not a multiple of 4, compute any remaining output samples here. 00104 ** No loop unrolling is used. */ 00105 blkCnt = numSamples % 0x4u; 00106 00107 while(blkCnt > 0u) 00108 { 00109 /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 00110 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00111 /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 00112 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00113 00114 /* Decrement the loop counter */ 00115 blkCnt--; 00116 } 00117 00118 #else 00119 00120 /* Run the below code for Cortex-M0 */ 00121 00122 while(numSamples > 0u) 00123 { 00124 /* outReal = realA[0]* realB[0] + realA[2]* realB[2] + realA[4]* realB[4] + .....+ realA[numSamples-2]* realB[numSamples-2] */ 00125 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00126 /* outImag = imagA[1]* imagB[1] + imagA[3]* imagB[3] + imagA[5]* imagB[5] + .....+ imagA[numSamples-1]* imagB[numSamples-1] */ 00127 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00128 00129 /* Decrement the loop counter */ 00130 numSamples--; 00131 } 00132 00133 #endif /* #ifndef ARM_MATH_CM0 */ 00134 00135 /* Store the real and imaginary results in 16.48 format */ 00136 *realResult = real_sum; 00137 *imagResult = imag_sum; 00138 } 00139