dfa.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. /*
  2. * @Descripttion:敏感词
  3. * @version:
  4. * @Author: Neo,Huang
  5. * @Date: 2021-04-17 11:04:16
  6. * @LastEditors: Neo,Huang
  7. * @LastEditTime: 2021-08-06 14:12:44
  8. */
  9. package sensitive
  10. import (
  11. "bufio"
  12. "fmt"
  13. "io"
  14. "os"
  15. "strings"
  16. )
  17. type Null struct{}
  18. var sensitiveWord = make(map[string]interface{})
  19. var Set = make(map[string]Null)
  20. const InvalidWords = " ,~,!,@,#,$,%,^,&,*,(,),_,-,+,=,?,<,>,.,—,,,。,/,\\,|,《,》,?,;,:,:,',‘,;,“,"
  21. var InvalidWord = make(map[string]Null) //无效词汇,不参与敏感词汇判断直接忽略
  22. // 初始化
  23. func InitDFA() {
  24. words := strings.Split(InvalidWords, ",")
  25. for _, v := range words {
  26. InvalidWord[v] = Null{}
  27. }
  28. f, err := os.Open("sensitive/words.txt")
  29. if err != nil {
  30. return
  31. }
  32. defer f.Close()
  33. buff := bufio.NewReader(f)
  34. for {
  35. line, err := buff.ReadString('\n')
  36. if err != nil || io.EOF == err {
  37. break
  38. }
  39. line = strings.TrimSpace(line)
  40. Set[line] = Null{}
  41. }
  42. AddSensitiveToMap(Set)
  43. }
  44. //生成违禁词集合
  45. func AddSensitiveToMap(set map[string]Null) {
  46. for key := range set {
  47. str := []rune(key)
  48. nowMap := sensitiveWord
  49. for i := 0; i < len(str); i++ {
  50. if _, ok := nowMap[string(str[i])]; !ok { //如果该key不存在,
  51. thisMap := make(map[string]interface{})
  52. thisMap["isEnd"] = false
  53. nowMap[string(str[i])] = thisMap
  54. nowMap = thisMap
  55. } else {
  56. nowMap = nowMap[string(str[i])].(map[string]interface{})
  57. }
  58. if i == len(str)-1 {
  59. nowMap["isEnd"] = true
  60. }
  61. }
  62. }
  63. }
  64. func GetSensitiveMap() map[string]interface{} {
  65. return sensitiveWord
  66. }
  67. //敏感词汇转换为*
  68. func ChangeSensitiveWords(txt string, sensitive map[string]interface{}) (word string) {
  69. str := []rune(txt)
  70. nowMap := sensitive
  71. start := -1
  72. tag := -1
  73. for i := 0; i < len(str); i++ {
  74. if _, ok := InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," {
  75. continue
  76. }
  77. if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok {
  78. tag++
  79. if tag == 0 {
  80. start = i
  81. }
  82. isEnd, _ := thisMap["isEnd"].(bool)
  83. if isEnd {
  84. for y := start; y < i+1; y++ {
  85. str[y] = 42
  86. }
  87. nowMap = sensitive
  88. start = -1
  89. tag = -1
  90. } else {
  91. nowMap = nowMap[string(str[i])].(map[string]interface{})
  92. }
  93. } else {
  94. if start != -1 {
  95. i = start
  96. }
  97. nowMap = sensitive
  98. start = -1
  99. tag = -1
  100. }
  101. }
  102. return string(str)
  103. }
  104. // 敏感词检查
  105. func CheckSensitive(txt string, sensitive map[string]interface{}) string {
  106. str := []rune(txt)
  107. nowMap := sensitive
  108. start := -1
  109. tag := -1
  110. for i := 0; i < len(str); i++ {
  111. // 跳过无效字符
  112. if _, ok := InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," {
  113. continue
  114. }
  115. if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok {
  116. tag++
  117. if tag == 0 {
  118. start = i
  119. }
  120. isEnd, _ := thisMap["isEnd"].(bool)
  121. if isEnd {
  122. sensi := ""
  123. for y := start; y < i+1; y++ {
  124. sensi += string(str[y])
  125. }
  126. fmt.Printf("敏感词:%s\n", sensi)
  127. return "false"
  128. } else {
  129. nowMap = nowMap[string(str[i])].(map[string]interface{})
  130. }
  131. } else {
  132. if start != -1 {
  133. i = start
  134. }
  135. nowMap = sensitive
  136. start = -1
  137. tag = -1
  138. }
  139. }
  140. return "true"
  141. }