emp-toolkit
block.h
Go to the documentation of this file.
1 /*
2  This file is part of JustGarble.
3 
4  JustGarble is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  JustGarble is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with JustGarble. If not, see <http://www.gnu.org/licenses/>.
16 
17  */
18 
19 
20 #ifndef UTIL_BLOCK_H__
21 #define UTIL_BLOCK_H__
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdint.h>
26 #include <stdbool.h>
27 #include <xmmintrin.h>
28 #include <emmintrin.h>
29 #include <smmintrin.h>
30 #include <wmmintrin.h>
31 #include <assert.h>
32 
33 typedef __m128i block;
34 typedef __m128i block_tpl[2];
35 inline block xorBlocks(block x, block y){return _mm_xor_si128(x,y);}
36 inline block andBlocks(block x, block y){return _mm_and_si128(x,y);}
37 inline void xorBlocks_arr(block* res, const block* x, const block* y, int nblocks) {
38  const block * dest = nblocks+x;
39  for (; x != dest;) {
40  *(res++) = xorBlocks(*(x++), *(y++));
41  }
42 }
43 inline void xorBlocks_arr(block* res, const block* x, block y, int nblocks) {
44  const block * dest = nblocks+x;
45  for (; x != dest;) {
46  *(res++) = xorBlocks(*(x++), y);
47  }
48 }
49 inline void xorBlocks_arr2(block* res, const block* x, const block* y, int nblocks) {
50  const block * dest = nblocks+x;
51  for (; x != dest;) {
52  *(res++) = xorBlocks(*(x++), *(y++));
53  }
54 }
55 
56 inline bool block_cmp(const block * x, const block * y, int nblocks) {
57  const block * dest = nblocks+x;
58  for (; x != dest;) {
59  __m128i vcmp = _mm_xor_si128(*(x++), *(y++));
60  if(!_mm_testz_si128(vcmp, vcmp))
61  return false;
62  }
63  return true;
64 }
65 
66 #define zero_block() _mm_setzero_si128()
67 #define one_block() makeBlock(0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL)
68 #define getLSB(x) (*((unsigned short *)&x)&1)
69 #define makeBlock(X,Y) _mm_set_epi64((__m64)(X), (__m64)(Y))
70 
71 inline bool isZero(const block * b) {
72  return _mm_testz_si128(*b,*b);
73 }
74 
75 inline bool isOne(const block * b) {
76  __m128i neq = _mm_xor_si128(*b, one_block());
77  return _mm_testz_si128(neq, neq);
78 }
79 
80 
81 //Modified from
82 //https://mischasan.wordpress.com/2011/10/03/the-full-sse2-bit-matrix-transpose-routine/
83 // with inner most loops changed to _mm_set_epi8 and _mm_set_epi16
84  inline void
85 sse_trans(uint8_t *out, uint8_t const *inp, int nrows, int ncols)
86 {
87 # define INP(x,y) inp[(x)*ncols/8 + (y)/8]
88 # define OUT(x,y) out[(y)*nrows/8 + (x)/8]
89  int rr, cc, i, h;
90  union { __m128i x; uint8_t b[16]; } tmp;
91  __m128i vec;
92  assert(nrows % 8 == 0 && ncols % 8 == 0);
93 
94  // Do the main body in 16x8 blocks:
95  for (rr = 0; rr <= nrows - 16; rr += 16) {
96  for (cc = 0; cc < ncols; cc += 8) {
97  vec = _mm_set_epi8(
98  INP(rr+15,cc),INP(rr+14,cc),INP(rr+13,cc),INP(rr+12,cc),INP(rr+11,cc),INP(rr+10,cc),INP(rr+9,cc),
99  INP(rr+8,cc),INP(rr+7,cc),INP(rr+6,cc),INP(rr+5,cc),INP(rr+4,cc),INP(rr+3,cc),INP(rr+2,cc),INP(rr+1,cc),
100  INP(rr+0,cc));
101  for (i = 8; --i >= 0; vec = _mm_slli_epi64(vec, 1))
102  *(uint16_t*)&OUT(rr,cc+i)= _mm_movemask_epi8(vec);
103  }
104  }
105  if (rr == nrows) return;
106 
107  // The remainder is a block of 8x(16n+8) bits (n may be 0).
108  // Do a PAIR of 8x8 blocks in each step:
109  for (cc = 0; cc <= ncols - 16; cc += 16) {
110  vec = _mm_set_epi16(
111  *(uint16_t const*)&INP(rr + 7, cc), *(uint16_t const*)&INP(rr + 6, cc),
112  *(uint16_t const*)&INP(rr + 5, cc), *(uint16_t const*)&INP(rr + 4, cc),
113  *(uint16_t const*)&INP(rr + 3, cc), *(uint16_t const*)&INP(rr + 2, cc),
114  *(uint16_t const*)&INP(rr + 1, cc), *(uint16_t const*)&INP(rr + 0, cc));
115  for (i = 8; --i >= 0; vec = _mm_slli_epi64(vec, 1)) {
116  OUT(rr, cc + i) = h = _mm_movemask_epi8(vec);
117  OUT(rr, cc + i + 8) = h >> 8;
118  }
119  }
120  if (cc == ncols) return;
121 
122  // Do the remaining 8x8 block:
123  for (i = 0; i < 8; ++i)
124  tmp.b[i] = INP(rr + i, cc);
125  for (i = 8; --i >= 0; tmp.x = _mm_slli_epi64(tmp.x, 1))
126  OUT(rr, cc + i) = _mm_movemask_epi8(tmp.x);
127 }
128 
129 
130 const char fix_key[] = "\x61\x7e\x8d\xa2\xa0\x51\x1e\x96"
131 "\x5e\x41\xc2\x9b\x15\x3f\xc7\x7a";
132 
133 /*------------------------------------------------------------------------
134  / OCB Version 3 Reference Code (Optimized C) Last modified 08-SEP-2012
135  /-------------------------------------------------------------------------
136  / Copyright (c) 2012 Ted Krovetz.
137  /
138  / Permission to use, copy, modify, and/or distribute this software for any
139  / purpose with or without fee is hereby granted, provided that the above
140  / copyright notice and this permission notice appear in all copies.
141  /
142  / THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
143  / WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
144  / MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
145  / ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
146  / WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
147  / ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
148  / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
149  /
150  / Phillip Rogaway holds patents relevant to OCB. See the following for
151  / his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
152  /
153  / Special thanks to Keegan McAllister for suggesting several good improvements
154  /
155  / Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
156  /------------------------------------------------------------------------- */
157 static inline block double_block(block bl) {
158  const __m128i mask = _mm_set_epi32(135,1,1,1);
159  __m128i tmp = _mm_srai_epi32(bl, 31);
160  tmp = _mm_and_si128(tmp, mask);
161  tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3));
162  bl = _mm_slli_epi32(bl, 1);
163  return _mm_xor_si128(bl,tmp);
164 }
165 
166 static inline block LEFTSHIFT1(block bl) {
167  const __m128i mask = _mm_set_epi32(0,0, (1<<31),0);
168  __m128i tmp = _mm_and_si128(bl,mask);
169  bl = _mm_slli_epi64(bl, 1);
170  return _mm_xor_si128(bl,tmp);
171 }
172 static inline block RIGHTSHIFT(block bl) {
173  const __m128i mask = _mm_set_epi32(0,1,0,0);
174  __m128i tmp = _mm_and_si128(bl,mask);
175  bl = _mm_slli_epi64(bl, 1);
176  return _mm_xor_si128(bl,tmp);
177 }
178 #endif//UTIL_BLOCK_H__
__m128i block
Definition: block.h:8
bool isZero(const block *b)
Definition: block.h:71
block xorBlocks(block x, block y)
Definition: block.h:35
__m128i block_tpl[2]
Definition: block.h:34
bool isOne(const block *b)
Definition: block.h:75
void sse_trans(uint8_t *out, uint8_t const *inp, int nrows, int ncols)
Definition: block.h:85
void xorBlocks_arr2(block *res, const block *x, const block *y, int nblocks)
Definition: block.h:49
const char fix_key[]
Definition: block.h:130
#define OUT(x, y)
bool block_cmp(const block *x, const block *y, int nblocks)
Definition: block.h:56
#define one_block()
Definition: block.h:67
#define INP(x, y)
void xorBlocks_arr(block *res, const block *x, const block *y, int nblocks)
Definition: block.h:37
block andBlocks(block x, block y)
Definition: block.h:36