1 # IP prefix binary search
3 This structure is an ordered array data structure that is used to store a dynamic set,
4 where the keys are low and high IP addresses of prefix. Binary search compares low and high IP
5 with the searched IP address and returns data associated with match prefix.
7 The prefix array can be used for storing any data (string, int). For example, it can be used to
8 aggregate information from multiple blacklists. Data type is specific
9 to each user and their source format (source file, delimiters, data format or data length,...), so user
10 MUST define ```function load_networks()```, which load and parse information, and data about
11 networks from any of their source and fill ```'ipps_network_list'``` structure.
13 Before using the search, call the ```ipps_init()``` function. The function creates all
14 necessary structures for storing and accessing the data and return structure of IPv4 and
15 IPv6 array of prefixes with data (network context).
18 * Structure with networks array and networks counter
20 For searching, use the function ```ipps_search()```. Function returns number of data associated with match prefix and return pointer to data as parameter:
22 * Structure of ip prefix context, ipps_context_t
24 * Void pointer to data array
26 For example, if blacklist contains:
34 ```init()``` creates 2 intervals:
35 * From 1.0 to 1.127 with data "aaa" and "bbb"
36 * From 1.128 to 1.255 with data "aaa" and "ccc":
39 192.168.1.0 192.168.1.255
45 and ```ip_prefix_search()``` is called with 192.168.1.100, search return number 2 and pointer to data "aaa" and "bbb".
46 For 192.168.1.200, return also number 2 but data are "aaa" and "ccc". For 192.1.1.1, search return 0 and pointer
49 For destruction of a whole structure and data there is ```ipps_destroy()``` function, parameter is pointer to
50 the ```ipps_context_t structure```, that has to be destroyed. Also, a list of networks is necessary to destroy with
51 function ```destroy_networks()``` (this function isn't a part of library and the user must define it).
53 Recommended control flow is:
54 1. ```load_networks()```
56 3. ```destroy_networks()```
57 4. ```ipps_search()```
58 5. ```ipps_destroy()```
61 ******************************************************************
68 * \brief Init and find prefix EXAMPLE
69 * \author Erik Sabik <xsabik02@stud.fit.vutbr.cz>
70 * \author Ondrej Ploteny <xplote01@stud.fit.vutbr.cz>
74 * Copyright (C) 2020 CESNET
76 * SPDX-License-Identifier: BSD-3-Clause
80 * Prefix search example
81 * Example of using ip_prefix library designed for binary searching IP address in multiple blacklists.
82 * Blacklists information are save in a extern file.
84 * Function load_networks() read blacklist file by line and parse ip addresses, masks and data.
85 * Parsed networks are stored in array in network_list_t struct. Function load_networks() is specific
86 * for each user and his datasource. User must define function for fill ipps_network_list_t. In this
87 * example is datasource in format:
88 * <ip address>/<mask>,<char[4]>\n
89 * 192.168.2.1/24,aaa\n
91 * Function ipps_init() make prefix context from loaded networks and preprocess networks for binary search in
92 * multiple blacklists:
93 * - masked and sort source ip addresses
94 * - split ipv4 and ipv networks
95 * - create intervals (compute low and high ip of network), copy data
96 * - split overlapping intervals
98 * Intervals with its data are stored in array in ipps_context_t struct.
99 * Function ipps_init() return pointer to new ipps_context struct or NULL if init fails.
102 * !! If function load_networks() is call, you have to call destroy_networks() function !!
103 * !! If function ipps_init() is call, you have to call ipps_destroy() function !!
105 * For prefix searching, call function ipps_search() with searched ip address (ip_addr_t struct) and properly
106 * initialized prefix context (ipps_context_t struct).
107 * Function ipps_search() return number of match blacklists and return their data as parameter 'data',
108 * if ipps_search() return 0, no match in blacklist and ip is not found in any blacklist.
114 #include <sys/time.h>
115 #include <unirec/ip_prefix_search.h>
118 /* Length of loaded data in bytes */
121 /** Trim white spaces from string
122 * Shift pointer to start of string `str` to point at first not whitespace character.
123 * Find last character of string `str` that is not whitespace and write '\0' 1 byte after it.
124 * @param[in] str String containing whitespaces.
125 * @return Pointer to string containing no whitespaces.
127 char *str_trim(char *str)
132 while (isspace(*str)) {
141 // Trim trailing space
142 end = str + strlen(str) - 1;
143 while (end > str && isspace(*end)) {
147 // Write new null terminator
152 /** Convert IP address from string to struct
153 * Trim whitespaces from string `str` and check for network mask.
154 * If network mask is not present in string `str` then convert IP address from string `str` to network struct `network`,
155 * set network mask `network->mask` to 32 and for each octet from end of IP address `network->addr.ui32[2]`
156 * that equals to 0 subtract 8 from network mask, if octet is not equal to 0 then end.
157 * If network mask is present in string `str` then convert it to number and write it to `network->mask`
158 * then remove it from string `str` and convert IP address from string `str` to network struct `network`.
159 * @param[in] str String containing IP address and network mask.
160 * @param[in] network Pointer to network structure.
161 * return 1 on success otherwise 0.
164 int str_to_network(char *str, ipps_network_t *network)
170 network->data_len = DATALENGTH;
171 network->data = malloc(network->data_len * sizeof(char));
172 if(network->data == NULL) {
173 fprintf(stderr, "ERROR in allocating data");
176 memset(network->data, 0, network->data_len);
178 if ((pos = strstr(str, ",")) != NULL)
180 memcpy(network->data, pos+1,3);
184 // Trim whitespaces from string
187 // If mask not present, read ip and calculate mask from ip
188 if ((pos = strstr(str, "/")) == NULL) {
189 // Convert IPv4 from string, if fail return 0
190 if (ip_from_str(str, &network->addr)) {
191 // Calculate mask by counting zero octets from end of net addr
192 for (i = 3; i >= 0; i--) {
193 if ((network->addr.ui32[2] & (0xFF<<(i*8))) == 0 ) {
202 } else { // Mask is present, read both ip and mask
203 // Convert net mask from string to uint32
204 network->mask = (uint32_t)atoi(pos + 1);
206 // Remove net mask from string
209 // Convert ip from string, if fail return 0
210 if (!ip_from_str(str, &network->addr)) {
219 /** Extract network addresses from file
220 * Allocate initial memory for networks list structure `ipps_network_list_t` and network structure
222 * Read file `file` line by line and save networks addresses in array of ipss_network structure `networks`.
223 * If there are more networks than can fit in allocated memory then reallocate memory with 10 more spaces for networks.
224 * source file format:
225 * <ip_addr>/<mask>,<data>\n
228 * @param[in] file Pointer to string containing file name.
229 * @return Pointer to networks list structure if success else return NULL.
231 ipps_network_list_t *load_networks(FILE *file)
238 int struct_count = 50; // Starting v4_count of structs to alloc
241 // ************* LOAD NETWORKS ********************** //
243 // Alloc memory for networks structs, if malloc fails return NULL
244 ipps_network_t *networks = malloc(struct_count * sizeof(ipps_network_t));
245 if (networks == NULL) {
246 fprintf(stderr, "ERROR allocating memory for network structures\n");
250 // Alloc memory for networks list, if malloc fails return NULL
251 ipps_network_list_t * networks_list = malloc(sizeof(ipps_network_list_t));
252 if (networks_list == NULL) {
253 fprintf(stderr, "ERROR allocating memory for network list\n");
257 // Read file by line, convert string to struct
258 while (fgets(line, 64, file) != NULL) {
259 if (str_to_network(line, &networks[i]) == 1) {
262 // If limit is reached alloc new memory
263 if (i >= struct_count) {
265 // If realloc fails return NULL
266 if ((networks = realloc(networks, struct_count * sizeof(ipps_network_t))) == NULL) {
267 fprintf(stderr, "ERROR in reallocating network structure\n");
272 // Not a valid line -> ignore it and print warning
273 fprintf(stderr, "Warning: Invalid network on line %u\n", line_n);
278 networks_list->net_count = i;
279 networks_list->networks = networks;
281 return networks_list;
286 /** Dealloc ipps_network_list_t
287 * Dealloc struct ipps_network_list_t, opposite of load_network
288 * @param[in] network_list Pointer to network_list structure
291 void destroy_networks(ipps_network_list_t *network_list) {
293 for (index = 0; index < network_list->net_count; index++) {
294 free(network_list->networks[index].data);
297 free(network_list->networks);
305 struct timeval tv1, tv2;
307 /* Blacklist dataset file */
310 dataset = fopen("blacklist6.txt", "r");
311 if (dataset == NULL) {
312 printf("blacklist6.txt not found\n");
316 /* Parse file line and create IPv4 and IPv6 network lists */
317 ipps_network_list_t * network_list = load_networks(dataset);
320 if (network_list->net_count == 0)
322 printf("Empty tree, nothing to do");
327 /* Context of networks => networks are transformed to intervals from low to high IP of network,
328 * overlapping intervals are divided with relevant data, data are copied from 'ipps_network_list_t'
329 * ipps_context_t store sorted array of IPv4 intervals and IPv6 intervals and their counters separately
331 ipps_context_t *prefix_context;
333 prefix_context = ipps_init(network_list);
336 if (prefix_context == NULL)
338 destroy_networks(network_list);
342 /* 'ipps_network_list_t' is no longer a need, data are copied in 'ipps_context_t' struct */
343 destroy_networks(network_list);
346 /* Print all ip intervals and data */
347 printf("----------------------------IPv4----------------------------\n");
350 char ip_string[INET6_ADDRSTRLEN];
352 printf("\t%-16s \t%-16s\t%s\n", "Low IP", "High IP", "Data");
354 /* Check print IPv4 */
355 for (index = 0; index < prefix_context->v4_count; ++index)
357 ip_to_str(&prefix_context->v4_prefix_intervals[index].low_ip, &ip_string[0]);
358 printf("\t%-16s", ip_string);
359 ip_to_str(&prefix_context->v4_prefix_intervals[index].high_ip, &ip_string[0]);
360 printf("\t%-15s", ip_string);
362 for(j=0; j < prefix_context->v4_prefix_intervals[index].data_cnt; ++j)
364 printf("\t%s", (char *) prefix_context->v4_prefix_intervals[index].data_array[j]);
369 printf("------------------------------------------------------------\n");
371 printf("\n-------------------------IPv6-------------------------------\n");
372 printf("\t%-46s \t%-46s\t\t%s\n", "Low IP", "High IP", "Data");
373 /* Check print IPv6 */
374 for(index = 0; index < prefix_context->v6_count; ++index)
376 ip_to_str(&prefix_context->v6_prefix_intervals[index].low_ip, &ip_string[0]);
377 printf("\t%-46s", ip_string);
378 ip_to_str(&prefix_context->v6_prefix_intervals[index].high_ip, &ip_string[0]);
379 printf("\t%-46s", ip_string);
381 for(j=0; j < prefix_context->v6_prefix_intervals[index].data_cnt; ++j)
383 printf("\t%s", (char *) prefix_context->v6_prefix_intervals[index].data_array[j]);
387 printf("------------------------------------------------------------\n\n");
392 /***************** Find some ip addresses in cycle ****************/
393 char *ip_addr[] = {"251.205.178.136",
396 "fd57:9f25:3409::07",
403 int search_result; // Number of match prefixes, 0 if not match
404 char ** data = NULL; // data in blacklist is string
406 gettimeofday(&tv1, NULL);
407 for (index = 0; index < 5; ++index) {
408 /* for each ip address in ip_addr array find in prefix interval_search_context */
409 printf("%-16s\n", ip_addr[index]);
411 ip_from_str(ip_addr[index], &ip);
413 /* find ip address 'ip' in network prefix interval_search_context */
414 search_result = ipps_search(&ip, prefix_context, (void ***) &data);
417 /* if there is any match prefix, print all data */
418 for (j = 0; j < search_result; ++j) {
419 printf("\t%d %s\n", j, data[j]);
423 printf("\tIP not found in blacklist dataset\n");
427 gettimeofday(&tv2, NULL);
429 printf("search time: %ld usec", tv2.tv_usec - tv1.tv_usec);
431 /* Dealloc prefix interval_search_context struct */
432 ipps_destroy(prefix_context);