class BloomFilter

Constants

VERSION

Public Class Methods

load(p1) click to toggle source
VALUE bloom_load(VALUE klass, VALUE file) {
    int fd;
    void *buffer;
    size_t nbits, bytes;
    FileHeader header;
    BloomFilter *filter;
    VALUE instance;

    fd = open(CSTRING(file), O_RDONLY);
    if (fd == -1)
        rb_raise(rb_eIOError, "unable to open file: %s", strerror(errno));

    if (read(fd, &header, sizeof(header)) != sizeof(header)) {
        close(fd);
        rb_raise(rb_eIOError, "unable to read file, header corrupted\n");
    }

    nbits  = (header.table_size + 7) / 8;
    buffer = malloc(nbits);
    if (!buffer) {
        close(fd);
        rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");
    }

    bytes = read(fd, buffer, nbits);
    if (bytes != nbits) {
        free(buffer);
        close(fd);
        rb_raise(rb_eStandardError, "unable to load BloomFilter, expected %ld but got %ld bytes", nbits, bytes);
    }

    filter = bloom_filter_new(header.table_size, string_nocase_hash, header.num_functions);
    bloom_filter_load(filter, buffer);
    instance = Data_Wrap_Struct(klass, 0, bloom_free, filter);

    free(buffer);
    close(fd);
    return instance;
}
new(p1 = v1) click to toggle source
VALUE bloom_initialize(int argc, VALUE *argv, VALUE self) {
    double error;
    size_t nbits, nhash, nmax;

    VALUE max_size, error_rate, bitmap_size, hash_count, options;
    BloomFilter *filter = 0;

    rb_scan_args(argc, argv, "01", &options);
    if (!NIL_P(options) && TYPE(options) != T_HASH)
        rb_raise(rb_eArgError, "invalid options, expect hash");

    if (NIL_P(options)) {
        nbits = 1000000;
        nhash = 4;
    }
    else {
        max_size    = rb_hash_aref(options, ID2SYM(rb_intern("size")));
        error_rate  = rb_hash_aref(options, ID2SYM(rb_intern("error_rate")));
        bitmap_size = rb_hash_aref(options, ID2SYM(rb_intern("bits")));
        hash_count  = rb_hash_aref(options, ID2SYM(rb_intern("hashes")));

        nhash = NIL_P(hash_count) ? 4 : NUM2ULONG(hash_count);

        if (!NIL_P(bitmap_size))
            nbits = NUM2ULONG(bitmap_size);
        else if (!NIL_P(max_size)) {
            nmax  = NUM2ULONG(max_size);
            error = NIL_P(error_rate) ? 0.01 : NUM2DBL(error_rate);
            nbits = ceil(fabs(log(error) * (double)nmax / pow(log(2), 2)));
            nhash = ceil(0.7 * (double)nbits / (double)nmax);
        }
        else
            rb_raise(rb_eArgError, "requires either size & error_rate or bits & hashes");
    }

    filter = bloom_filter_new(nbits, string_nocase_hash, nhash);

    if (!filter)
        rb_raise(rb_eNoMemError, "unable to allocate memory for BloomFilter");

    DATA_PTR(self) = filter;
    return self;
}

Public Instance Methods

binary() click to toggle source
VALUE bloom_binary(VALUE klass) {
    BloomFilter *filter = bloom_handle(klass);

    VALUE bitmap;
    unsigned char *buffer;

    int nbytes = (filter->table_size + 7) / 8;
    buffer = (unsigned char *)malloc(nbytes);

    if (!buffer)
        rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");

    bloom_filter_read(filter, buffer);

    bitmap = rb_str_new((const char *)buffer, nbytes);
    free(buffer);
    return bitmap;
}
binary=(p1) click to toggle source
VALUE bloom_binary_set(VALUE klass, VALUE buffer) {
    BloomFilter *filter = bloom_handle(klass);
    unsigned char* ptr = (unsigned char *) RSTRING_PTR(buffer);
    bloom_filter_load(filter, ptr);
    return Qtrue;
}
bits() click to toggle source
VALUE bloom_bits(VALUE klass) {
    BloomFilter *filter = bloom_handle(klass);

    VALUE bitmap;
    char *buffer;
    unsigned char b;
    int i = 0, bit, nbits = filter->table_size;

    buffer = (char *)malloc(nbits);
    if (!buffer)
        rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");

    for (i = 0; i < nbits; i++) {
        b = filter->table[i / 8];
        bit = 1 << (i % 8);

        if ((b & bit) == 0)
            buffer[i] = '0';
            else
                buffer[i] = '1';
    }

    bitmap = rb_str_new(buffer, nbits);
    free(buffer);
    return bitmap;
}
dump(p1) click to toggle source
VALUE bloom_dump(VALUE klass, VALUE file) {
    int fd;
    void *buffer;
    uint64_t nbits;
    FileHeader header;
    BloomFilter *filter = bloom_handle(klass);

    nbits  = (filter->table_size + 7) / 8;
    buffer = malloc(nbits);

    if (!buffer)
        rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");

    bloom_filter_read(filter, buffer);

    fd = open(CSTRING(file), O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
    if (fd == -1) {
        free(buffer);
        rb_raise(rb_eIOError, "unable to open file. %s", strerror(errno));
    }

    header.table_size    = filter->table_size;
    header.num_functions = filter->num_functions;

    if (write(fd, &header, sizeof(header)) == -1) {
        free(buffer);
        close(fd);
        rb_raise(rb_eIOError, "error dumping BloomFilter: %s\n", strerror(errno));
    }

    if (write(fd, buffer, nbits) != -1) {
        free(buffer);
        close(fd);
        return Qtrue;
    }
    else {
        free(buffer);
        close(fd);
        rb_raise(rb_eIOError, "error dumping BloomFilter: %s\n", strerror(errno));
    }

    return Qfalse; // not reachable
}
include?(p1) click to toggle source
VALUE bloom_include(VALUE klass, VALUE string) {
    BloomFilter *filter = bloom_handle(klass);
    return bloom_filter_query(filter, (BloomFilterValue)CSTRING(string)) ? Qtrue : Qfalse;
}
insert(p1) click to toggle source
VALUE bloom_insert(VALUE klass, VALUE string) {
    BloomFilter *filter = bloom_handle(klass);
    bloom_filter_insert(filter, (BloomFilterValue)CSTRING(string));
    return Qtrue;
}