Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion _mbsupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
#ifndef ___MBSUPPORT_H__
#define ___MBSUPPORT_H__

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h> // C99

#define BW_ENC_UTF8 0
#define BW_ENC_GBK 1

Expand Down Expand Up @@ -85,4 +91,3 @@ bw_mb_strtolower(uint8_t *str, uint8_t *end, int encoding)
}

#endif /* ___MBSUPPORT_H__ */

58 changes: 32 additions & 26 deletions badwords.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,34 @@
#include "config.h"
#endif

#include "php.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "ext/standard/php_string.h"
#include "ext/standard/php_var.h"
#include "ext/standard/php_smart_str.h"
#include "_mbsupport.h"
#include "php57_include.h"

#include "badwords.h"
#include "_mbsupport.h"

static void
bw_match_text(struct bw_trie_header_t *header, zval *return_value,
uint8_t *text, uint8_t *text_to_walk, int c)
{
struct bw_node_t *root = (struct bw_node_t *)(header+1), *node, *gotnode;
uint8_t *end = text_to_walk + c, *watch, *gotwatch;
uint8_t *_Rep_base = (uint8_t *)(root + header->node_count);
// uint8_t *_Rep_base = (uint8_t *)(root + header->node_count);

smart_str result = {0};
COM57_SMART_STRING_T result = {0};

while (text_to_walk < end) {
/* MATCH */
if (root->next[*text_to_walk] != 0) {
watch = text_to_walk;
node = root;
gotnode = NULL;

gotwatch = NULL;

do {
node = root + node->next[*watch];
if (node->is_fragment) {
Expand All @@ -63,8 +62,8 @@ bw_match_text(struct bw_trie_header_t *header, zval *return_value,
}
while (watch < end && node->next[*watch] != 0);

if (gotnode) {
smart_str_appendl(&result, text, (gotwatch - text_to_walk + 1));
if (gotnode && gotwatch) {
COM57_SMART_STRING_APPENDL(&result, text, (gotwatch - text_to_walk + 1));
break;
}
}
Expand All @@ -74,19 +73,22 @@ bw_match_text(struct bw_trie_header_t *header, zval *return_value,
text_to_walk = watch;
}

smart_str_0(&result);
COM57_SMART_STRING_0(&result);

if (result.len) {
RETURN_STRINGL(result.c, result.len, 0);
// 统一PHP 5/7 接口(PHP 5有折损)
// COM57_RETURN_STRINGL(result.c, result.len, 0);
COM57_RETVAL_STRINGL(result.c, result.len, 1);
COM57_SMART_STRING_FREE(&result);
} else {
RETURN_EMPTY_STRING();
}
}

void
bw_trie_match(zval *trie, zval *return_value, uint8_t *text, int c)
bw_trie_match(char *trie, zval *return_value, uint8_t *text, int c)
{
struct bw_trie_header_t *header = (struct bw_trie_header_t *) Z_STRVAL_P(trie);
struct bw_trie_header_t *header = (struct bw_trie_header_t *)trie;

if (header->magic_num != BW_TRIE_MAGIC
|| header->version != BW_TRIE_VERSION) RETURN_FALSE;
Expand All @@ -95,7 +97,7 @@ bw_trie_match(zval *trie, zval *return_value, uint8_t *text, int c)
bw_match_text(header, return_value, text, text, c);
}
else {
uint8_t *text_to_walk = estrndup(text, c);
uint8_t *text_to_walk = (uint8_t *)estrndup((char *)text, c);
if (!text_to_walk) {
RETURN_FALSE;
} else {
Expand All @@ -114,15 +116,16 @@ bw_replace_text(struct bw_trie_header_t *header, zval *return_value,
uint8_t *end = text_to_walk + c, *watch, *gotwatch;
uint8_t *_Rep_base = (uint8_t *)(root + header->node_count);

smart_str result = {0};
COM57_SMART_STRING_T result = {0};

while (text_to_walk < end) {
/* REPLACE */
if (root->next[*text_to_walk] != 0) {
watch = text_to_walk;
node = root;
gotnode = NULL;

gotwatch = NULL;

do {
node = root + node->next[*watch];
if (node->is_fragment) {
Expand All @@ -141,43 +144,46 @@ bw_replace_text(struct bw_trie_header_t *header, zval *return_value,
}
while (watch < end && node->next[*watch] != 0);

if (gotnode) {
if (gotnode && gotwatch) {
struct bw_string_t *replace = (struct bw_string_t *)(_Rep_base + gotnode->replace);
smart_str_appendl(&result, replace->byte, replace->len);
COM57_SMART_STRING_APPENDL(&result, replace->byte, replace->len);
text += gotwatch - text_to_walk + 1;
text_to_walk = gotwatch + 1;
continue;
}
}

watch = bw_mb_skip_char(text_to_walk, end, header->trie_encoding);
smart_str_appendl(&result, text, watch-text_to_walk);
COM57_SMART_STRING_APPENDL(&result, text, watch-text_to_walk);
text += watch - text_to_walk;
text_to_walk = watch;
}

smart_str_0(&result);
COM57_SMART_STRING_0(&result);

if (result.len) {
RETURN_STRINGL(result.c, result.len, 0);
// 统一PHP 5/7 接口(PHP 5有折损)
// COM57_RETURN_STRINGL(result.c, result.len, 0);
COM57_RETVAL_STRINGL(result.c, result.len, 1);
COM57_SMART_STRING_FREE(&result);
} else {
RETURN_EMPTY_STRING();
}
}

void
bw_trie_replace(zval *trie, zval *return_value, uint8_t *text, int c)
bw_trie_replace(char *trie, zval *return_value, uint8_t *text, int c)
{
struct bw_trie_header_t *header = (struct bw_trie_header_t *) Z_STRVAL_P(trie);
struct bw_trie_header_t *header = (struct bw_trie_header_t *)trie;

if (header->magic_num != BW_TRIE_MAGIC
|| header->version != BW_TRIE_VERSION) RETURN_FALSE;

if (!header->case_insensitive) {
bw_replace_text(header, return_value, text, text, c);
}
else {
uint8_t *text_to_walk = estrndup(text, c);
uint8_t *text_to_walk = (uint8_t *)estrndup((char *)text, c);
if (!text_to_walk) {
RETURN_FALSE;
} else {
Expand Down
4 changes: 2 additions & 2 deletions badwords.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct bw_node_t {
};
};

void bw_trie_match(zval *trie, zval *return_value, uint8_t *text, int c);
void bw_trie_replace(zval *trie, zval *return_value, uint8_t *text, int c);
void bw_trie_match(char *trie, zval *return_value, uint8_t *text, int c);
void bw_trie_replace(char *trie, zval *return_value, uint8_t *text, int c);

#endif /* __BADWORDS_H_ */
15 changes: 9 additions & 6 deletions compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@
#include "config.h"
#endif

#include "php.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "ext/standard/php_string.h"
#include "ext/standard/php_var.h"
#include "php57_include.h"

#include "_mbsupport.h"
#include "compiler.h"

Expand Down Expand Up @@ -184,6 +183,7 @@ bw_trie_compiler_compile(struct bw_trie_compiler_t *compiler, zval *return_value
uint32_t rlen = compiler->replace_len;
uint32_t tlen = hlen + nlen + rlen;

// zend malloc
uint8_t *trie = emalloc(tlen);

if (trie) {
Expand All @@ -197,10 +197,13 @@ bw_trie_compiler_compile(struct bw_trie_compiler_t *compiler, zval *return_value
memcpy(trie+hlen, compiler->nodes, nlen);
memcpy(trie+hlen+nlen, compiler->replaces, rlen);

RETURN_STRINGL(trie, tlen, 0);
// 统一PHP 5/7 接口(PHP 5有折损)
// COM57_RETURN_STRINGL((char *)trie, tlen, 0);
COM57_RETVAL_STRINGL((char *)trie, tlen, 1);
efree(trie);
} else {
RETURN_FALSE;
}

RETURN_FALSE;
}

void bw_trie_compiler_free(struct bw_trie_compiler_t *compiler)
Expand Down
20 changes: 15 additions & 5 deletions config.m4
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
dnl
dnl $Id: config9.m4 2011-08-09 15:48:23Z Wang Wenlin $
dnl $Id: config9.m4 2011-08-09 15:48:23Z Wang Wenlin/2018-04-11 18:19:23Z wlmwang $
dnl

PHP_ARG_ENABLE(badwords, whether to enable badwords support,
[ --enable-badwords Enable badwords support])

if test "$PHP_BADWORDS" != "no"; then

AC_DEFINE(HAVE_BADWORDS,1,[Whether you want badwords support])

AC_DEFINE(HAVE_BADWORDS,1,[Whether you want badwords support])
dnl PHP-7.*.*
dnl PHP_NEW_EXTENSION(badwords, badwords.c compiler.c php_badwords.c, $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)

PHP_NEW_EXTENSION(badwords, badwords.c compiler.c php_badwords.c, $ext_shared)

dnl this is needed to build the extension with phpize and -Wall

if test "$PHP_DEBUG" = "yes"; then
CFLAGS="$CFLAGS -Wall"
fi
dnl if test "$PHP_DEBUG" = "yes"; then
dnl CFLAGS="$CFLAGS -Wall"
dnl fi

if test -z "$PHP_DEBUG"; then
AC_ARG_ENABLE(debug,
[ --enable-debug compile with debugging system],
[ PHP_DEBUG=$enableval], [PHP_DEBUG=no])
fi

fi
16 changes: 10 additions & 6 deletions example/example1.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ function get_shared_badwords__() {
return $badwords;
}

$wordfile = D_P.'data/cache/words.php';
$triebin = '/tmp/com.foo.bar-words.bin';
$persistkey = 'badwords::com.foo.bar::words';
$wordfile = D_P.'words.php';
$triebin = D_P.'com.foo.bar-words.bin';
$persistkey = 'badwords::com.foo.bar1::words';

$wmtime = filemtime($wordfile);
$tmtime = filemtime($triebin);

if ($tmtime === FALSE || $tmtime !== $wmtime && mt_rand(0, 99) < 5) {
include($wordfile);
$compiler = badwords_compiler_create(BADWORDS_ENCODING_GBK, True);
$compiler = badwords_compiler_create(BADWORDS_ENCODING_UTF8, True);
badwords_compiler_append($compiler, $replace);
unset($replace);

Expand All @@ -45,6 +45,7 @@ function get_shared_badwords__() {
file_put_contents($triebin_tmp, $trie);
touch($triebin_tmp, $wmtime);
rename($triebin_tmp, $triebin);
chmod($triebin, 0755);
unset($trie);
}
}
Expand Down Expand Up @@ -87,5 +88,8 @@ function do_match($message)
return badwords_match($badwords, $message);
}

$message = do_replace($message);
$xxword = do_match($message);
$message = "近日特朗普将携希拉里一同访问中国,北京欢迎你!520~";
$rlword = do_replace($message);
$mtword = do_match($message);

var_dump($rlword, $mtword);
Loading