liberasurecode  1.6.1
Erasure Code API library
erasurecode_preprocessing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2014 Tushar Gohad, Kevin M Greenan, Eric Lambert, Mark Storer
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * Redistributions of source code must retain the above copyright notice, this
8  * list of conditions and the following disclaimer.
9  *
10  * Redistributions in binary form must reproduce the above copyright notice, this
11  * list of conditions and the following disclaimer in the documentation and/or
12  * other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY
13  * THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
14  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
16  * EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
17  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
18  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
21  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
22  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  * liberasurecode proprocssing helpers implementation
25  *
26  * vi: set noai tw=79 ts=4 sw=4:
27  */
28 
29 #include "erasurecode_backend.h"
30 #include "erasurecode_helpers.h"
31 #include "erasurecode_helpers_ext.h"
32 #include "erasurecode_log.h"
33 #include "erasurecode_preprocessing.h"
34 #include "erasurecode_stdinc.h"
35 
36 int prepare_fragments_for_encode(ec_backend_t instance,
37  int k, int m,
38  const char *orig_data, uint64_t orig_data_size, /* input */
39  char **encoded_data, char **encoded_parity, /* output */
40  int *blocksize)
41 {
42  int i, ret = 0;
43  int data_len; /* data len to write to fragment headers */
44  int aligned_data_len; /* EC algorithm compatible data length */
45  int buffer_size, payload_size = 0;
46  int metadata_size, data_offset = 0;
47 
48  /* Calculate data sizes, aligned_data_len guaranteed to be divisible by k*/
49  data_len = orig_data_size;
50  aligned_data_len = get_aligned_data_size(instance, orig_data_size);
51  *blocksize = payload_size = (aligned_data_len / k);
52  metadata_size = instance->common.ops->get_backend_metadata_size(
53  instance->desc.backend_desc,
54  *blocksize);
55  data_offset = instance->common.ops->get_encode_offset(
56  instance->desc.backend_desc,
57  metadata_size);
58  buffer_size = payload_size + metadata_size;
59 
60  for (i = 0; i < k; i++) {
61  int copy_size = data_len > payload_size ? payload_size : data_len;
62  char *fragment = (char *) alloc_fragment_buffer(buffer_size);
63  if (NULL == fragment) {
64  ret = -ENOMEM;
65  goto out_error;
66  }
67 
68  /* Copy existing data into clean, zero'd out buffer */
69  encoded_data[i] = get_data_ptr_from_fragment(fragment);
70 
71  if (data_len > 0) {
72  memcpy(encoded_data[i] + data_offset, orig_data, copy_size);
73  }
74 
75  orig_data += copy_size;
76  data_len -= copy_size;
77  }
78 
79  for (i = 0; i < m; i++) {
80  char *fragment = (char *) alloc_fragment_buffer(buffer_size);
81  if (NULL == fragment) {
82  ret = -ENOMEM;
83  goto out_error;
84  }
85 
86  encoded_parity[i] = get_data_ptr_from_fragment(fragment);
87  }
88 
89 out:
90  return ret;
91 
92 out_error:
93  printf ("ERROR in encode\n");
94  if (encoded_data) {
95  for (i = 0; i < k; i++) {
96  if (encoded_data[i])
97  free_fragment_buffer(encoded_data[i]);
98  }
99  check_and_free_buffer(encoded_data);
100  }
101 
102  if (encoded_parity) {
103  for (i = 0; i < m; i++) {
104  if (encoded_parity[i])
105  free_fragment_buffer(encoded_parity[i]);
106  }
107  check_and_free_buffer(encoded_parity);
108  }
109 
110  goto out;
111 }
112 
113 /*
114  * Note that the caller should always check realloc_bm during success or
115  * failure to free buffers allocated here. We could free up in this function,
116  * but it is internal to this library and only used in a few places. In any
117  * case, the caller has to free up in the success case, so it may as well do
118  * so in the failure case.
119  */
121  int k, int m,
122  char **data, char **parity,
123  int *missing_idxs,
124  int *orig_size, int *fragment_payload_size, int fragment_size,
125  uint64_t *realloc_bm)
126 {
127  int i; /* a counter */
128  unsigned long long missing_bm; /* bitmap form of missing indexes list */
129  int orig_data_size = -1;
130  int payload_size = -1;
131 
132  missing_bm = convert_list_to_bitmap(missing_idxs);
133 
134  /*
135  * Determine if each data fragment is:
136  * 1.) Alloc'd: if not, alloc new buffer (for missing fragments)
137  * 2.) Aligned to 16-byte boundaries: if not, alloc a new buffer
138  * memcpy the contents and free the old buffer
139  */
140  for (i = 0; i < k; i++) {
141  /*
142  * Allocate or replace with aligned buffer if the buffer was not
143  * aligned.
144  * DO NOT FREE: the python GC should free the original when cleaning up
145  * 'data_list'
146  */
147  if (NULL == data[i]) {
148  data[i] = alloc_fragment_buffer(fragment_size - sizeof(fragment_header_t));
149  if (NULL == data[i]) {
150  log_error("Could not allocate data buffer!");
151  return -ENOMEM;
152  }
153  *realloc_bm = *realloc_bm | (1 << i);
154  } else if (!is_addr_aligned((unsigned long)data[i], 16)) {
155  char *tmp_buf = alloc_fragment_buffer(fragment_size - sizeof(fragment_header_t));
156  if (NULL == tmp_buf) {
157  log_error("Could not allocate temp buffer!");
158  return -ENOMEM;
159  }
160  memcpy(tmp_buf, data[i], fragment_size);
161  data[i] = tmp_buf;
162  *realloc_bm = *realloc_bm | (1 << i);
163  }
164 
165  /* Need to determine the size of the original data */
166  if (((missing_bm & (1 << i)) == 0) && orig_data_size < 0) {
167  orig_data_size = get_orig_data_size(data[i]);
168  if (orig_data_size < 0) {
169  log_error("Invalid orig_data_size in fragment header!");
170  return -EBADHEADER;
171  }
172  payload_size = get_fragment_payload_size(data[i]);
173  if (orig_data_size < 0) {
174  log_error("Invalid fragment_size in fragment header!");
175  return -EBADHEADER;
176  }
177  }
178  }
179 
180  /* Perform the same allocation, alignment checks on the parity fragments */
181  for (i = 0; i < m; i++) {
182  /*
183  * Allocate or replace with aligned buffer, if the buffer was not aligned.
184  * DO NOT FREE: the python GC should free the original when cleaning up 'data_list'
185  */
186  if (NULL == parity[i]) {
187  parity[i] = alloc_fragment_buffer(fragment_size-sizeof(fragment_header_t));
188  if (NULL == parity[i]) {
189  log_error("Could not allocate parity buffer!");
190  return -ENOMEM;
191  }
192  *realloc_bm = *realloc_bm | (1 << (k + i));
193  } else if (!is_addr_aligned((unsigned long)parity[i], 16)) {
194  char *tmp_buf = alloc_fragment_buffer(fragment_size-sizeof(fragment_header_t));
195  if (NULL == tmp_buf) {
196  log_error("Could not allocate temp buffer!");
197  return -ENOMEM;
198  }
199  memcpy(tmp_buf, parity[i], fragment_size);
200  parity[i] = tmp_buf;
201  *realloc_bm = *realloc_bm | (1 << (k + i));
202  }
203 
204  /* Need to determine the size of the original data */
205  if (((missing_bm & (1 << (k + i))) == 0) && orig_data_size < 0) {
206  orig_data_size = get_orig_data_size(parity[i]);
207  if (orig_data_size < 0) {
208  log_error("Invalid orig_data_size in fragment header!");
209  return -EBADHEADER;
210  }
211  payload_size = get_fragment_payload_size(parity[i]);
212  if (orig_data_size < 0) {
213  log_error("Invalid fragment_size in fragment header!");
214  return -EBADHEADER;
215  }
216  }
217 
218  }
219 
220  *orig_size = orig_data_size;
221  *fragment_payload_size = payload_size;
222 
223  return 0;
224 }
225 
227  int k, int m,
228  char **fragments, int num_fragments,
229  char **data, char **parity, int *missing)
230 {
231  int i = 0;
232  int num_missing = 0;
233  int index;
234 
235  /*
236  * Set all data and parity entries to NULL
237  */
238  for (i = 0; i < k; i++) {
239  data[i] = NULL;
240  }
241  for (i = 0; i < m; i++) {
242  parity[i] = NULL;
243  }
244 
245  /*
246  * Fill in data and parity with available fragments
247  */
248  for (i = 0; i < num_fragments; i++) {
249  index = get_fragment_idx(fragments[i]);
250  if (index < 0 || index > (k + m)) {
251  return -EBADHEADER;
252  }
253  if (index < k) {
254  data[index] = fragments[i];
255  } else {
256  parity[index - k] = fragments[i];
257  }
258  }
259 
260  /*
261  * Fill in missing array with missing indexes
262  */
263  for (i = 0; i < k; i++) {
264  if (NULL == data[i]) {
265  missing[num_missing] = i;
266  num_missing++;
267  }
268  }
269  for (i = 0; i < m; i++) {
270  if (NULL == parity[i]) {
271  missing[num_missing] = i + k;
272  num_missing++;
273  }
274  }
275  // TODO: In general, it is possible to reconstruct one or more fragments
276  // when more than m fragments are missing (e.g. flat XOR codes)
277  return (num_missing > m) ? -EINSUFFFRAGS : 0;
278 }
279 
280 int fragments_to_string(int k, int m,
281  char **fragments, int num_fragments,
282  char **orig_payload, uint64_t *payload_len)
283 {
284  char *internal_payload = NULL;
285  char **data = NULL;
286  int orig_data_size = -1;
287  int i;
288  int index;
289  int data_size;
290  int num_data = 0;
291  int string_off = 0;
292  int ret = -1;
293 
294  if (num_fragments < k) {
295  /*
296  * This is not necessarily an error condition, so *do not log here*
297  * We can maybe debug log, if necessary.
298  */
299  goto out;
300  }
301 
302  data = (char **) get_aligned_buffer16(sizeof(char *) * k);
303 
304  if (NULL == data) {
305  log_error("Could not allocate buffer for data!!");
306  ret = -ENOMEM;
307  goto out;
308  }
309 
310  for (i = 0; i < num_fragments; i++) {
311  index = get_fragment_idx(fragments[i]);
312  data_size = get_fragment_payload_size(fragments[i]);
313  if ((index < 0) || (data_size < 0)) {
314  log_error("Invalid fragment header information!");
315  ret = -EBADHEADER;
316  goto out;
317  }
318 
319  /* Validate the original data size */
320  if (orig_data_size < 0) {
321  orig_data_size = get_orig_data_size(fragments[i]);
322  } else {
323  if (get_orig_data_size(fragments[i]) != orig_data_size) {
324  log_error("Inconsistent orig_data_size in fragment header!");
325  ret = -EBADHEADER;
326  goto out;
327  }
328  }
329 
330  /* Skip parity fragments, put data fragments in index order */
331  if (index >= k) {
332  continue;
333  } else {
334  /* Make sure we account for duplicates */
335  if (NULL == data[index]) {
336  data[index] = fragments[i];
337  num_data++;
338  }
339  }
340  }
341 
342  /* We do not have enough data fragments to do this! */
343  if (num_data != k) {
344  /*
345  * This is not necessarily an error condition, so *do not log here*
346  * We can maybe debug log, if necessary.
347  */
348  goto out;
349  }
350 
351  /* Create the string to return */
352  internal_payload = (char *) get_aligned_buffer16(orig_data_size);
353  if (NULL == internal_payload) {
354  log_error("Could not allocate buffer for decoded string!");
355  ret = -ENOMEM;
356  goto out;
357  }
358 
359  /* Pass the original data length back */
360  *payload_len = orig_data_size;
361 
362  /* Copy fragment data into cstring (fragments should be in index order) */
363  for (i = 0; i < num_data && orig_data_size > 0; i++) {
364  char* fragment_data = get_data_ptr_from_fragment(data[i]);
365  int fragment_size = get_fragment_payload_size(data[i]);
366  int payload_size = orig_data_size > fragment_size ? fragment_size : orig_data_size;
367  memcpy(internal_payload + string_off, fragment_data, payload_size);
368  orig_data_size -= payload_size;
369  string_off += payload_size;
370  }
371 
372  /* Everything worked just fine */
373  ret = 0;
374 
375 out:
376  if (NULL != data) {
377  free(data);
378  }
379 
380  *orig_payload = internal_payload;
381  return ret;
382 }
383 
384 
int get_fragment_idx(char *buf)
char * alloc_fragment_buffer(int size)
int free_fragment_buffer(char *buf)
int get_aligned_data_size(ec_backend_t instance, int data_len)
Compute a size aligned to the number of data and the underlying wordsize of the EC algorithm.
int get_fragment_payload_size(char *buf)
void * get_aligned_buffer16(int size)
Memory Management Methods.
char * get_data_ptr_from_fragment(char *buf)
void * check_and_free_buffer(void *buf)
Deallocate memory buffer if it's not NULL.
int get_orig_data_size(char *buf)
int prepare_fragments_for_encode(ec_backend_t instance, int k, int m, const char *orig_data, uint64_t orig_data_size, char **encoded_data, char **encoded_parity, int *blocksize)
int prepare_fragments_for_decode(int k, int m, char **data, char **parity, int *missing_idxs, int *orig_size, int *fragment_payload_size, int fragment_size, uint64_t *realloc_bm)
int fragments_to_string(int k, int m, char **fragments, int num_fragments, char **orig_payload, uint64_t *payload_len)
int get_fragment_partition(int k, int m, char **fragments, int num_fragments, char **data, char **parity, int *missing)