45#ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
46#define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
49 unsigned char t[64 / 8 ];
50 unsigned int w[64 / 32];
51 unsigned short s[64 / 16];
52 unsigned char c[64 / 8];
60static inline void renormalize(
unsigned char* X,
unsigned char threshold)
65 unsigned char min = X[0];
67 for (
i = 0;
i < NUMSTATES;
i++)
70 for (
i = 0;
i < NUMSTATES;
i++)
83 unsigned char* Branchtab)
86 unsigned int decision0, decision1;
87 unsigned char metric, m0, m1, m2, m3;
92 int PRECISIONSHIFT = 2;
95 for (j = 0; j < RATE; j++)
96 metric += (Branchtab[
i + j * NUMSTATES / 2] ^ syms[s * RATE + j]) >> METRICSHIFT;
97 metric = metric >> PRECISIONSHIFT;
99 unsigned char max = ((RATE * ((256 - 1) >> METRICSHIFT)) >> PRECISIONSHIFT);
102 m1 = X[
i + NUMSTATES / 2] + (max - metric);
103 m2 = X[
i] + (max - metric);
104 m3 = X[
i + NUMSTATES / 2] + metric;
106 decision0 = (
signed int)(m0 - m1) > 0;
107 decision1 = (
signed int)(m2 - m3) > 0;
109 Y[2 *
i] = decision0 ? m1 : m0;
110 Y[2 *
i + 1] = decision1 ? m3 : m2;
112 d->
w[
i / (
sizeof(
unsigned int) * 8 / 2) +
113 s * (
sizeof(
decision_t) /
sizeof(
unsigned int))] |=
114 (decision0 | decision1 << 1) << ((2 *
i) & (
sizeof(
unsigned int) * 8 - 1));
325#include <emmintrin.h>
327#include <pmmintrin.h>
329#include <xmmintrin.h>
335 unsigned int framebits,
337 unsigned char* Branchtab)
340 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
341 unsigned char a75, a81;
343 short int s20, s21, s26, s27;
344 unsigned char *a74, *a80, *b6;
345 short int *a110, *a111, *a91, *a93, *a94;
346 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
348 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
349 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
350 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
375 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
377 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
388 a91 = ((
short int*)dec);
417 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
419 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
441 if ((((
unsigned char*)Y)[0] > 210)) {
462 unsigned char a188, a194;
464 short int s48, s49, s54, s55;
465 unsigned char *a187, *a193, *b15;
466 short int *a204, *a206, *a207, *a223, *a224, *b16;
467 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
469 __m128i a199, a200, a218, a219;
470 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
471 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
472 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
497 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
499 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
510 a204 = ((
short int*)dec);
540 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
542 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
564 if ((((
unsigned char*)X)[0] > 210)) {
597 for (j = 0; j < (framebits + excess) % 2; ++j) {
599 for (
i = 0;
i < 64 / 2;
i++) {
601 (((framebits + excess) >> 1) << 1) + j,
631 unsigned int framebits,
633 unsigned char* Branchtab)
636 for (i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
637 unsigned char a75, a81;
639 short int s20, s21, s26, s27;
640 unsigned char *a74, *a80, *b6;
641 short int *a110, *a111, *a91, *a93, *a94;
642 __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83, *a95, *a96, *a97, *a98, *a99;
644 __m128i a100, a101, a103, a104, a107, a108, a109, a76, a78, a79, a82, a84, a85,
645 a88, a89, a90, d10, d11, d12, d9, m23, m24, m25, m26, m27, m28, m29, m30, s18,
646 s19, s22, s23, s24, s25, s28, s29, t13, t14, t15, t16, t17, t18;
671 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
673 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
684 a91 = ((
short int*)dec);
713 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
715 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
737 if ((((
unsigned char*)Y)[0] > 210)) {
758 unsigned char a188, a194;
760 short int s48, s49, s54, s55;
761 unsigned char *a187, *a193, *b15;
762 short int *a204, *a206, *a207, *a223, *a224, *b16;
763 __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210, *a211, *a212, *a215,
765 __m128i a199, a200, a218, a219;
766 __m128i a189, a191, a192, a195, a197, a198, a201, a202, a203, a213, a214, a216,
767 a217, a220, a221, a222, d17, d18, d19, d20, m39, m40, m41, m42, m43, m44, m45,
768 m46, s46, s47, s50, s51, s52, s53, s56, s57, t25, t26, t27, t28, t29, t30;
793 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
795 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
806 a204 = ((
short int*)dec);
836 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63));
838 _mm_set_epi8(63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63),
860 if ((((
unsigned char*)X)[0] > 210)) {
893 for (j = 0; j < (framebits + excess) % 2; ++j) {
895 for (
i = 0;
i < 64 / 2;
i++) {
897 (((framebits + excess) >> 1) << 1) + j,
925 unsigned int framebits,
927 unsigned char* Branchtab)
929 int nbits = framebits + excess;
931 int RENORMALIZE_THRESHOLD = 210;
934 for (s = 0; s < nbits; s++) {
936 for (
i = 0;
i < NUMSTATES / 2;
i++) {
945 Y = (
unsigned char*)tmp;
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition: sse2neon.h:5140
FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:6405
FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:3101
FORCE_INLINE int _mm_movemask_epi8(__m128i a)
Definition: sse2neon.h:4776
FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i)
Definition: sse2neon.h:3128
#define _mm_srli_epi64(a, imm)
Definition: sse2neon.h:5863
FORCE_INLINE __m128i _mm_set1_epi8(signed char w)
Definition: sse2neon.h:5239
FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:6300
FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: sse2neon.h:6458
FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: sse2neon.h:3284
#define _mm_shufflelo_epi16(a, imm)
Definition: sse2neon.h:5459
FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:4696
#define _mm_srli_epi16(a, imm)
Definition: sse2neon.h:5812
FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm)
Definition: sse2neon.h:5885
FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:6206
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: sse2neon.h:195
int64x2_t __m128i
Definition: sse2neon.h:244
FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: sse2neon.h:3187
FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: sse2neon.h:6386
Definition: volk_8u_x4_conv_k7_r2_8u.h:48
unsigned int w[64/32]
Definition: volk_8u_x4_conv_k7_r2_8u.h:50
static void BFLY(int i, int s, unsigned char *syms, unsigned char *Y, unsigned char *X, decision_t *d, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:77
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:331
static void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:627
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:921
static void renormalize(unsigned char *X, unsigned char threshold)
Definition: volk_8u_x4_conv_k7_r2_8u.h:60
for i
Definition: volk_config_fixed.tmpl.h:13