/* * @Descripttion:敏感词 * @version: * @Author: Neo,Huang * @Date: 2021-04-17 11:04:16 * @LastEditors: Neo,Huang * @LastEditTime: 2021-08-06 14:12:44 */ package sensitive import ( "bufio" "fmt" "io" "os" "strings" ) type Null struct{} var sensitiveWord = make(map[string]interface{}) var Set = make(map[string]Null) const InvalidWords = " ,~,!,@,#,$,%,^,&,*,(,),_,-,+,=,?,<,>,.,—,,,。,/,\\,|,《,》,?,;,:,:,',‘,;,“," var InvalidWord = make(map[string]Null) //无效词汇,不参与敏感词汇判断直接忽略 // 初始化 func InitDFA() { words := strings.Split(InvalidWords, ",") for _, v := range words { InvalidWord[v] = Null{} } f, err := os.Open("sensitive/words.txt") if err != nil { return } defer f.Close() buff := bufio.NewReader(f) for { line, err := buff.ReadString('\n') if err != nil || io.EOF == err { break } line = strings.TrimSpace(line) Set[line] = Null{} } AddSensitiveToMap(Set) } //生成违禁词集合 func AddSensitiveToMap(set map[string]Null) { for key := range set { str := []rune(key) nowMap := sensitiveWord for i := 0; i < len(str); i++ { if _, ok := nowMap[string(str[i])]; !ok { //如果该key不存在, thisMap := make(map[string]interface{}) thisMap["isEnd"] = false nowMap[string(str[i])] = thisMap nowMap = thisMap } else { nowMap = nowMap[string(str[i])].(map[string]interface{}) } if i == len(str)-1 { nowMap["isEnd"] = true } } } } func GetSensitiveMap() map[string]interface{} { return sensitiveWord } //敏感词汇转换为* func ChangeSensitiveWords(txt string, sensitive map[string]interface{}) (word string) { str := []rune(txt) nowMap := sensitive start := -1 tag := -1 for i := 0; i < len(str); i++ { if _, ok := InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," { continue } if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok { tag++ if tag == 0 { start = i } isEnd, _ := thisMap["isEnd"].(bool) if isEnd { for y := start; y < i+1; y++ { str[y] = 42 } nowMap = sensitive start = -1 tag = -1 } else { nowMap = nowMap[string(str[i])].(map[string]interface{}) } } else { if start != -1 { i = start } nowMap = sensitive start = -1 tag = -1 } } return string(str) } // 敏感词检查 func CheckSensitive(txt string, sensitive map[string]interface{}) string { str := []rune(txt) nowMap := sensitive start := -1 tag := -1 for i := 0; i < len(str); i++ { // 跳过无效字符 if _, ok := InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," { continue } if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok { tag++ if tag == 0 { start = i } isEnd, _ := thisMap["isEnd"].(bool) if isEnd { sensi := "" for y := start; y < i+1; y++ { sensi += string(str[y]) } fmt.Printf("敏感词:%s\n", sensi) return "false" } else { nowMap = nowMap[string(str[i])].(map[string]interface{}) } } else { if start != -1 { i = start } nowMap = sensitive start = -1 tag = -1 } } return "true" }