寂静欢喜

字典树的Node.js语言实现

敏感词处理最开始本来是用Node写了一遍实现,
这里也顺便贴出来吧。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
* 敏感词处理
*/

var fs = require('fs');
var readline = require('readline');
// 敏感词树
var root = {};

var start_time = new Date();
// 添加敏感词
function addWord(word) {
if (word == null || word.length == 0) return;
var wordArray = Array.from(word);
var wordLen = wordArray.length - 1;
var current = root;
for (var i in wordArray) {
if (current[wordArray[i]] == null) {
current[wordArray[i]] = {};
}
current = current[wordArray[i]];
if (i == wordLen) {
current["END"] = 1;
}
}
}

//敏感词存在out6.txt中,每行一个词
var rl = readline.createInterface({
input: fs.createReadStream('out6.txt'),
output: process.stdout,
terminal: false
});

rl.on('line', function(line) {
addWord(line);
});

rl.on('close', function(line) {
//需要过滤敏感词的文件在out_test.txt中
fs.readFile('out_test.txt', (err, data) => {
if (err) throw err;
start_time = new Date();
//console.log(JSON.stringify(root));
replaceWith(data.toString(), "*");
var end_time = new Date();
console.log(end_time.getTime() - start_time.getTime());
});
});

// 替换敏感词
function replaceWith(text, mask) {
if (text == null || text.length == 0) return;
var textArray = Array.from(text);
var textLen = textArray.length - 1;
for (var i = 0; i <= textLen; i++) {
var charLen = 0;
var current = root;
var k = i;
while ((current = current[textArray[k]]) != null) {
charLen += 1;
if (current["END"] == 1) {
for (var j = 0; j < charLen; j++) {
textArray[i + j] = mask;
}
i = i + charLen - 1;
break;
}
if (k == textLen) {
break;
}
k++;
}
}
return textArray.join('');
}