forked from dsopscak/utf8
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathguess_utf8.c
More file actions
63 lines (59 loc) · 1.46 KB
/
guess_utf8.c
File metadata and controls
63 lines (59 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include <stdio.h>
static int
is_valid_continuation(int c)
{
return c >= 128 && c <= 191;
}
int
guess_utf8(FILE *file)
{
int rval = 0;
int c = fgetc(file);
if (c != EOF)
{
/* Believe the windowsish BOM if it's there. Would be more robust
* to skip over it and anaylize the rest of the file.
*/
int c2 = fgetc(file);
if (c2 != EOF)
{
int c3 = fgetc(file);
if (c3 != EOF)
{
if (c == 0xef && c2 == 0xbb && c3 == 0xbf)
return 1;
ungetc(c3, file);
}
ungetc(c2, file);
}
}
while (c != EOF)
{
if (c <= 127) /* ascii stands alone */
c = fgetc(file);
else if (c >= 194 && c <= 223) /* one follow-on code */
{
if ( is_valid_continuation(fgetc(file)))
{
rval = 1;
c = fgetc(file);
}
else
return 0;
}
else if (c >= 224 && c <= 239) /* two follow-on codes */
{
if (is_valid_continuation(fgetc(file))
&& is_valid_continuation(fgetc(file)))
{
rval = 1;
c = fgetc(file);
}
else
return 0;
}
else
return 0;
}
return rval;
}