123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 |
- /*
- * @Descripttion:敏感词
- * @version:
- * @Author: Neo,Huang
- * @Date: 2021-04-17 11:04:16
- * @LastEditors: Neo,Huang
- * @LastEditTime: 2021-08-06 14:12:44
- */
- package sensitive
- import (
- "bufio"
- "fmt"
- "io"
- "os"
- "strings"
- )
- type Null struct{}
- var sensitiveWord = make(map[string]interface{})
- var Set = make(map[string]Null)
- const InvalidWords = " ,~,!,@,#,$,%,^,&,*,(,),_,-,+,=,?,<,>,.,—,,,。,/,\\,|,《,》,?,;,:,:,',‘,;,“,"
- var InvalidWord = make(map[string]Null) //无效词汇,不参与敏感词汇判断直接忽略
- // 初始化
- func InitDFA() {
- words := strings.Split(InvalidWords, ",")
- for _, v := range words {
- InvalidWord[v] = Null{}
- }
- f, err := os.Open("sensitive/words.txt")
- if err != nil {
- return
- }
- defer f.Close()
- buff := bufio.NewReader(f)
- for {
- line, err := buff.ReadString('\n')
- if err != nil || io.EOF == err {
- break
- }
- line = strings.TrimSpace(line)
- Set[line] = Null{}
- }
- AddSensitiveToMap(Set)
- }
- //生成违禁词集合
- func AddSensitiveToMap(set map[string]Null) {
- for key := range set {
- str := []rune(key)
- nowMap := sensitiveWord
- for i := 0; i < len(str); i++ {
- if _, ok := nowMap[string(str[i])]; !ok { //如果该key不存在,
- thisMap := make(map[string]interface{})
- thisMap["isEnd"] = false
- nowMap[string(str[i])] = thisMap
- nowMap = thisMap
- } else {
- nowMap = nowMap[string(str[i])].(map[string]interface{})
- }
- if i == len(str)-1 {
- nowMap["isEnd"] = true
- }
- }
- }
- }
- func GetSensitiveMap() map[string]interface{} {
- return sensitiveWord
- }
- //敏感词汇转换为*
- func ChangeSensitiveWords(txt string, sensitive map[string]interface{}) (word string) {
- str := []rune(txt)
- nowMap := sensitive
- start := -1
- tag := -1
- for i := 0; i < len(str); i++ {
- if _, ok := InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," {
- continue
- }
- if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok {
- tag++
- if tag == 0 {
- start = i
- }
- isEnd, _ := thisMap["isEnd"].(bool)
- if isEnd {
- for y := start; y < i+1; y++ {
- str[y] = 42
- }
- nowMap = sensitive
- start = -1
- tag = -1
- } else {
- nowMap = nowMap[string(str[i])].(map[string]interface{})
- }
- } else {
- if start != -1 {
- i = start
- }
- nowMap = sensitive
- start = -1
- tag = -1
- }
- }
- return string(str)
- }
- // 敏感词检查
- func CheckSensitive(txt string, sensitive map[string]interface{}) string {
- str := []rune(txt)
- nowMap := sensitive
- start := -1
- tag := -1
- for i := 0; i < len(str); i++ {
- // 跳过无效字符
- if _, ok := InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," {
- continue
- }
- if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok {
- tag++
- if tag == 0 {
- start = i
- }
- isEnd, _ := thisMap["isEnd"].(bool)
- if isEnd {
- sensi := ""
- for y := start; y < i+1; y++ {
- sensi += string(str[y])
- }
- fmt.Printf("敏感词:%s\n", sensi)
- return "false"
- } else {
- nowMap = nowMap[string(str[i])].(map[string]interface{})
- }
- } else {
- if start != -1 {
- i = start
- }
- nowMap = sensitive
- start = -1
- tag = -1
- }
- }
- return "true"
- }
|