c++-gtk-utils
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
c++-gtk-utils
reassembler.h
Go to the documentation of this file.
1
/* Copyright (C) 2005 to 2010 Chris Vine
2
3
The library comprised in this file or of which this file is part is
4
distributed by Chris Vine under the GNU Lesser General Public
5
License as follows:
6
7
This library is free software; you can redistribute it and/or
8
modify it under the terms of the GNU Lesser General Public License
9
as published by the Free Software Foundation; either version 2.1 of
10
the License, or (at your option) any later version.
11
12
This library is distributed in the hope that it will be useful, but
13
WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
Lesser General Public License, version 2.1, for more details.
16
17
You should have received a copy of the GNU Lesser General Public
18
License, version 2.1, along with this library (see the file LGPL.TXT
19
which came with this source code package in the c++-gtk-utils
20
sub-directory); if not, write to the Free Software Foundation, Inc.,
21
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22
23
*/
24
25
#ifndef CGU_REASSEMBLER_H
26
#define CGU_REASSEMBLER_H
27
28
#include <
c++-gtk-utils/shared_handle.h
>
29
#include <
c++-gtk-utils/cgu_config.h
>
30
31
namespace
Cgu {
32
33
namespace
Utf8 {
34
35
36
/**
37
* @class Reassembler reassembler.h c++-gtk-utils/reassembler.h
38
* @brief A class for reassembling UTF-8 strings sent over pipes and
39
* sockets so they form complete valid UTF-8 characters.
40
*
41
* Utf8::Reassembler is a functor class which takes in a partially
42
* formed UTF-8 string and returns a nul-terminated string comprising
43
* such of the input string (after inserting, at the beginning, any
44
* partially formed UTF-8 character which was at the end of the input
45
* string passed in previous calls to the functor) as forms complete
46
* UTF-8 characters (storing any partial character at the end for the
47
* next call to the functor). If the input string contains invalid
48
* UTF-8 after adding any stored previous part character (apart from
49
* any partially formed character at the end of the input string) then
50
* operator() will return a null Cgu::SharedHandle<char*> object (that
51
* is, Cgu::SharedHandle<char*>::get() will return 0). Such input
52
* will not be treated as invalid if it consists only of a single
53
* partly formed UTF-8 character which could be valid if further bytes
54
* were received and added to it. In that case the returned
55
* SharedHandle<char*> object will contain an allocated string of zero
56
* length, comprising only a terminating \0 character, rather than a
57
* NULL pointer.
58
*
59
* This enables UTF-8 strings to be sent over pipes, sockets, etc and
60
* displayed in a GTK+ object at the receiving end
61
*
62
* Note that for efficiency reasons the memory held in the returned
63
* Cgu::SharedHandle<char*> object may be greater than the length of
64
* the nul-terminated string that is contained in that memory: just
65
* let the Cgu::SharedHandle<char*> object manage the memory, and use
66
* the contents like any other nul-terminated string.
67
*
68
* This class is not needed if std::getline(), with its default '\\n'
69
* delimiter, is used to read UTF-8 characters using, say,
70
* Cgu::fdistream, because a whole '\\n' delimited line of UTF-8
71
* characters will always be complete.
72
*
73
* This is an example of its use, reading from a pipe until it is
74
* closed by the writer and putting the received text in a
75
* GtkTextBuffer object:
76
* @code
77
* using namespace Cgu;
78
*
79
* GtkTextIter end;
80
* GtkTextBuffer* text_buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_view));
81
* gtk_text_buffer_get_end_iter(text_buffer, &end);
82
*
83
* Utf8::Reassembler reassembler;
84
* const int BSIZE = 1024;
85
* char read_buffer[BSIZE];
86
* ssize_t res;
87
* do {
88
* res = ::read(fd, read_buffer, BSIZE);
89
* if (res > 0) {
90
* SharedHandle<char*> utf8(reassembler(read_buffer, res));
91
* if (utf8.get()) {
92
* gtk_text_buffer_insert(text_buffer, &end,
93
* utf8.get(), std::strlen(utf8));
94
* }
95
* else std::cerr << "Invalid utf8 text sent over pipe\n";
96
* }
97
* } while (res && (res != -1 || errno == EINTR));
98
* @endcode
99
*
100
* This class maintains an array as a data member, containing partly
101
* formed characters from previous calls to operator(), and should not
102
* be copied. There should be no reason to do so, but unfortunately
103
* enforcing this by explicitly precluding copy construction and copy
104
* assignment was overlooked when this class was first provided. At
105
* the next API break, the copy constructor will be explicitly deleted
106
* and moving only allowed. Where a Reassembler object is to be
107
* moved, use std::move and the code will be safe against this change
108
* in the future.
109
*/
110
111
class
Reassembler
{
112
size_t
stored;
113
const
static
size_t
buff_size = 6;
114
char
buffer[buff_size];
115
char
* join_buffer(
const
char
*,
size_t
);
116
public
:
117
/**
118
* Takes a byte array of wholly or partly formed UTF-8 characters to
119
* be converted (after taking account of previous calls to the method)
120
* to a valid string of wholly formed characters.
121
* @param input The input array.
122
* @param size The number of bytes in the input (not the number of
123
* UTF-8 characters).
124
* @return A Cgu::SharedHandle<char*> object holding a nul-terminated
125
* string comprising such of the input (after inserting, at the
126
* beginning, any partially formed UTF-8 character which was at the
127
* end of the input passed in previous calls to the functor) as forms
128
* complete UTF-8 characters (storing any partial character at the end
129
* for the next call to the functor). If the input is invalid after
130
* such recombination, then a null Cgu::SharedHandle<char*> object is
131
* returned (that is, Cgu::SharedHandle<char*>::get() will return 0).
132
* Such input will not be treated as invalid if it consists only of a
133
* single partly formed UTF-8 character which could be valid if
134
* further bytes were received and added to it. In that case the
135
* returned Cgu::SharedHandle<char*> object will contain an allocated
136
* string of zero length, comprising only a terminating \0 character,
137
* rather than a NULL pointer.
138
* @exception std::bad_alloc The method might throw std::bad_alloc if
139
* memory is exhausted and the system throws in that case. It will
140
* not throw any other exception.
141
*/
142
Cgu::SharedHandle<char*>
operator()
(
const
char
* input,
size_t
size);
143
144
/**
145
* Gets the number of bytes of a partially formed UTF-8 character
146
* stored for the next call to operator()(). It will not throw.
147
* @return The number of bytes.
148
*/
149
size_t
get_stored
()
const
{
return
stored;}
150
151
/**
152
* Resets the Reassembler, by discarding any partially formed UTF-8
153
* character from previous calls to operator()(). It will not throw.
154
*/
155
void
reset
() {stored = 0;}
156
157
/**
158
* The constructor will not throw.
159
*/
160
Reassembler
(): stored(0) {}
161
162
// TODO: At the next API break, provide a default and move
163
// constructor and move assignment operator, and omit a copy
164
// constructor and copy assignment operator: this class maintains an
165
// array as a data member
166
167
/* Only has effect if --with-glib-memory-slices-compat or
168
* --with-glib-memory-slices-no-compat option picked */
169
CGU_GLIB_MEMORY_SLICES_FUNCS
170
};
171
172
}
// namespace Utf8
173
174
}
// namespace Cgu
175
176
#endif
Generated on Sat Mar 9 2013 07:51:15 for c++-gtk-utils by
1.8.3.1