mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2026-02-16 23:30:00 +08:00
test: update unit test to check detected languages.
Excepting ASCII, UTF-16 and UTF-32 for which we don't detect languages yet.
This commit is contained in:
parent
82c1d2b25e
commit
1b5e68be00
@ -35,6 +35,7 @@
|
|||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -44,11 +45,10 @@
|
|||||||
|
|
||||||
#define BUFFER_SIZE 65536
|
#define BUFFER_SIZE 65536
|
||||||
|
|
||||||
char *
|
void
|
||||||
detect(FILE *fp)
|
detect(FILE *fp, char **charset, char **lang)
|
||||||
{
|
{
|
||||||
uchardet_t handle = uchardet_new();
|
uchardet_t handle = uchardet_new();
|
||||||
char *charset;
|
|
||||||
char buffer[BUFFER_SIZE];
|
char buffer[BUFFER_SIZE];
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
@ -67,16 +67,18 @@ detect(FILE *fp)
|
|||||||
}
|
}
|
||||||
uchardet_data_end(handle);
|
uchardet_data_end(handle);
|
||||||
|
|
||||||
charset = strdup(uchardet_get_encoding(handle, 0));
|
*charset = strdup(uchardet_get_encoding(handle, 0));
|
||||||
for (i = 0; charset[i]; i++)
|
if (uchardet_get_language(handle, 0))
|
||||||
|
*lang = strdup(uchardet_get_language(handle, 0));
|
||||||
|
else
|
||||||
|
*lang = NULL;
|
||||||
|
for (i = 0; (*charset)[i]; i++)
|
||||||
{
|
{
|
||||||
/* Our test files are lowercase. */
|
/* Our test files are lowercase. */
|
||||||
charset[i] = tolower(charset[i]);
|
(*charset)[i] = tolower((*charset)[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
uchardet_delete(handle);
|
uchardet_delete(handle);
|
||||||
|
|
||||||
return charset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
@ -84,9 +86,13 @@ main(int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
FILE *f;
|
FILE *f;
|
||||||
char *filename;
|
char *filename;
|
||||||
|
char *path;
|
||||||
char *expected_charset;
|
char *expected_charset;
|
||||||
|
char *expected_lang = NULL;
|
||||||
char *charset;
|
char *charset;
|
||||||
int success;
|
char *lang;
|
||||||
|
/* In a unit test, 0 means success, other returned values mean failure. */
|
||||||
|
int success = 1;
|
||||||
|
|
||||||
if (argc != 2)
|
if (argc != 2)
|
||||||
{
|
{
|
||||||
@ -108,27 +114,41 @@ main(int argc, char ** argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
expected_charset = strrchr(filename, '/');
|
path = realpath(filename, NULL);
|
||||||
if (expected_charset == NULL)
|
assert(path);
|
||||||
{
|
expected_charset = strrchr(path, '/');
|
||||||
expected_charset = filename;
|
assert(expected_charset);
|
||||||
}
|
*expected_charset = '\0';
|
||||||
else
|
expected_charset++;
|
||||||
{
|
|
||||||
expected_charset++;
|
|
||||||
}
|
|
||||||
expected_charset = strtok(expected_charset, ".");
|
expected_charset = strtok(expected_charset, ".");
|
||||||
|
|
||||||
charset = detect(f);
|
expected_lang = strrchr(path, '/');
|
||||||
|
assert(expected_lang);
|
||||||
|
expected_lang++;
|
||||||
|
|
||||||
|
detect(f, &charset, &lang);
|
||||||
fclose (f);
|
fclose (f);
|
||||||
|
|
||||||
/* In a unit test, 0 means success, other returned values mean failure. */
|
/* No lang detection is a failure, except for a few charset for
|
||||||
success = (strcmp(charset, expected_charset) != 0);
|
* which we still don't detect languages.
|
||||||
if (success) {
|
* TODO.
|
||||||
fprintf(stderr, "Got %s, expected %s\n", charset, expected_charset);
|
* */
|
||||||
|
if (strcmp(expected_charset, "ascii") == 0 ||
|
||||||
|
strcmp(expected_charset, "utf-16") == 0 ||
|
||||||
|
strcmp(expected_charset, "utf-16") == 0 ||
|
||||||
|
strcmp(expected_charset, "utf-32") == 0)
|
||||||
|
{
|
||||||
|
success = (strcmp(charset, expected_charset) != 0);
|
||||||
|
}
|
||||||
|
else if (lang)
|
||||||
|
{
|
||||||
|
success = (strcmp(charset, expected_charset) != 0) +
|
||||||
|
(strcmp(lang, expected_lang) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(path);
|
||||||
free(charset);
|
free(charset);
|
||||||
|
free(lang);
|
||||||
free(filename);
|
free(filename);
|
||||||
|
|
||||||
return success;
|
return success;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user