Golang基于DFA算法实现敏感词汇过滤

栏目: Go · 发布时间: 5年前

内容简介:DFA:确定有穷自动机。具体功能:

DFA:确定有穷自动机。

具体功能:

  1. 将敏感词汇保存在map中。
  2. 对敏感词汇进行过滤,将敏感词变为“*”。
  3. 对无意义符号进行忽略处理。
敏感词数据结构:
{  王:{
            isEnd: false
            八:{
                    isEnd:false
                    蛋:{
                              isEnd:true
                       }
                 }
       }
}
无意义符号数据结构:
{
  "@":Null (空结构体)
}
package main

import (
    "fmt"
    "strings"
)

type Null struct {}
var sensitiveWord = make(map[string]interface{})
var Set = make(map[string]Null)
const InvalidWords = " ,~,!,@,#,$,%,^,&,*,(,),_,-,+,=,?,<,>,.,—,,,。,/,\\,|,《,》,?,;,:,:,',‘,;,“,"
var InvalidWord = make(map[string]Null) //无效词汇,不参与敏感词汇判断直接忽略

//生成违禁词集合
func AddSensitiveToMap(set map[string]Null){
    for key := range set {
        str := []rune(key)
        nowMap := sensitiveWord
        for i := 0; i < len(str); i++ {
            if _,ok := nowMap[string(str[i])]; !ok {//如果该key不存在,
                thisMap := make(map[string]interface{})
                thisMap["isEnd"] = false
                nowMap[string(str[i])] = thisMap
                nowMap = thisMap
            }else {
                nowMap = nowMap[string(str[i])].(map[string]interface{})
            }
            if i == len(str)-1 {
                nowMap["isEnd"] = true
            }
        }

    }
}
//敏感词汇转换为*
func ChangeSensitiveWords(txt string,sensitive map[string]interface{}) (word string){
    str := []rune(txt)
    nowMap := sensitive
    start := -1
    tag := -1
    for i := 0; i < len(str); i++ {
        if _, ok:= InvalidWord[(string(str[i]))]; ok || string(str[i]) == "," {
            continue
        }
        if thisMap, ok :=nowMap[string(str[i])].(map[string]interface{}); ok {
            tag++
            if  tag == 0 {
                start = i

            }
            isEnd, _ := thisMap["isEnd"].(bool)
            if isEnd {
                for y := start; y < i+1; y++ {
                    str[y] = 42
                }
                nowMap = sensitive
                start = -1
                tag = -1

            }else{
                nowMap = nowMap[string(str[i])].(map[string]interface{})
            }

        }else{
            if start != -1 {
                i = start + 1
            }
            nowMap = sensitive
            start = -1
            tag = -1
        }
    }

    return string(str)
}
func main() {
    words := strings.Split(InvalidWords,",")
    for _, v := range words {
        InvalidWord[v] = Null{}
    }
    Set["你妈逼的"] = Null{}
    Set["你妈"] = Null{}
    Set["日"] = Null{}
    AddSensitiveToMap(Set)
    text := "文明用语你&* 妈,逼的你这个狗日的,怎么这么傻啊。我也是服了,我日,这些话我都说不出口"
    fmt.Println(ChangeSensitiveWords(text,sensitiveWord))

}

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持 码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们

Is Parallel Programming Hard, And, If So, What Can You Do About

Is Parallel Programming Hard, And, If So, What Can You Do About

Paul E. McKenney

The purpose of this book is to help you understand how to program shared-memory parallel machines without risking your sanity.1 By describing the algorithms and designs that have worked well in the pa......一起来看看 《Is Parallel Programming Hard, And, If So, What Can You Do About 》 这本书的介绍吧!

HTML 编码/解码
HTML 编码/解码

HTML 编码/解码

XML 在线格式化
XML 在线格式化

在线 XML 格式化压缩工具

HEX HSV 转换工具
HEX HSV 转换工具

HEX HSV 互换工具