.NET 6 实现敏感词过滤

一、什么是敏感词过滤?

敏感词过滤是一种处理网络内容的技术,可以检测和过滤出网络中的敏感/违禁词汇。它通过给定的关键字或字符串,判断网络内容是否包含某些敏感信息,从而防止违反法律法规的信息流通。

通常,可以使用两种方法来过滤敏感词:

黑名单过滤:即定义一个黑名单,将所有敏感词择记录在其中,然后对输入的文本进行对比,如果发现有敏感词,就将其过滤掉。

白名单过滤:即定义一个白名单,将所有不敏感的词汇记录在其中,然后对输入的文本进行对比,如果发现有不在白名单中的词汇,就将其过滤掉。

二、ToolGood.Words是什么?

ToolGood.Words是一款高性能非法词(敏感词)检测组件,附带繁体简体互换,支持全角半角互换,获取拼音首字母,获取拼音字母,拼音模糊搜索等功能。

ToolGood.Words的源码网站:https://gitcode.net/mirrors/toolgood/ToolGood.Words?utm_source=csdn_github_accelerator

三、在Visual Studio中安装ToolGood.Words

3.1、右键项目解决方案,选择“管理NuGet程序包”,如下图所示:

3.2、切换到"浏览"选项卡,搜索"ToolGood.Words"并安装:

安装完之后最好重新编译生成项目

四、创建"subContentCheck"类

敏感/违禁词汇因特殊内容不便上传,可自行在网站上查找

代码语言:javascript
复制
using Microsoft.AspNetCore.DataProtection.KeyManagement;
using Microsoft.AspNetCore.Http;
using Microsoft.CodeAnalysis.Text;
using Newtonsoft.Json;
using System.Collections;
using System.Text;
using ToolGood.Words;
using static System.Net.Mime.MediaTypeNames;
using IHostingEnvironment = Microsoft.AspNetCore.Hosting.IHostingEnvironment;

namespace WebApplication1 //放在自己项目中时,需要更换为自己的命名空间
{
public class keywords
{
public List<string> IllegalKeywords { get; set; }
}

public class urlwords
{
    public List&lt;string&gt; IllegalUrls { get; set; }
}

/// &lt;summary&gt;
/// 提交的内容敏感违禁词检查类
/// &lt;/summary&gt;
public class subContentCheck
{
    /// &lt;summary&gt;
    /// 本地静态文件地址路径
    /// &lt;/summary&gt;
    private IHostingEnvironment _hostingEnv;

    /// &lt;summary&gt;
    /// 敏感词库
    /// &lt;/summary&gt;
    private string dictionaryPath = &#34;/sensitiveWords/sensitiveWords.txt&#34;;

    /// &lt;summary&gt;
    /// 敏感链接、网站、网址库
    /// &lt;/summary&gt;
    private string urlsPath = &#34;/sensitiveWords/IllegalUrls.txt&#34;;

    /// &lt;summary&gt;
    /// 保存敏感词组
    /// &lt;/summary&gt;
    public string[] Words { get; set; }

    /// &lt;summary&gt;
    /// 一个参数的构造函数
    /// &lt;/summary&gt;
    /// &lt;param name=&#34;hostingEnv&#34;&gt;本地静态文件地址路径&lt;/param&gt;
    public subContentCheck(IHostingEnvironment hostingEnv)
    {
        _hostingEnv = hostingEnv;
        InitDictionary();
    }

    /// &lt;summary&gt;
    /// 初始化内存敏感词库
    /// &lt;/summary&gt;
    public void InitDictionary()
    {
        Words = new string[] { };
        string wordsPath = _hostingEnv.WebRootPath + dictionaryPath;
        string urlPath = _hostingEnv.WebRootPath + urlsPath;
        //List&lt;keywords&gt; keys = new List&lt;keywords&gt;();
        //List&lt;urlwords&gt; urls = new List&lt;urlwords&gt;();
        string[] readAllWords = System.IO.File.ReadAllLines(wordsPath, System.Text.Encoding.UTF8);
        string[] readAllurl = System.IO.File.ReadAllLines(urlPath, System.Text.Encoding.UTF8);
        //由于数组是非动态的,不能进行动态的添加,所有先将它转成list,操作
        ArrayList arrayList = new ArrayList(Words.ToList());
        if (readAllWords.Length &gt; 0 || readAllurl.Length &gt; 0)
        {
            if (readAllWords.Length &gt; 1)
            {
                //keywords key = new keywords();
                //key.IllegalKeywords = new List&lt;string&gt;();
                foreach (string itemWords in readAllWords)
                {
                    string[] allSplitWords = itemWords.Split(&#39;|&#39;);
                    foreach (string itemSplitWords in allSplitWords)
                    {
                        if (!string.IsNullOrEmpty(itemSplitWords))
                        {
                            arrayList.Add(itemSplitWords);
                            //string aaa = itemSplitWords;
                            //key.IllegalKeywords.Add(aaa);
                            //IllegalKeywords.Add(itemSplitWords);
                        }
                    }
                }
                //keys.Add(key);
            }
            else
            {
                if (readAllWords.Length == 1)
                {
                    string[] allSplitWords = readAllWords[0].Split(&#39;|&#39;);
                    //keywords key = new keywords();
                    //key.IllegalKeywords = new List&lt;string&gt;();
                    foreach (string itemSplitWords in allSplitWords)
                    {
                        if (!string.IsNullOrEmpty(itemSplitWords))
                        {
                            arrayList.Add(itemSplitWords);
                            //string aaa = itemSplitWords;
                            //key.IllegalKeywords.Add(aaa);
                            //IllegalKeywords.Add(itemSplitWords);
                        }
                    }
                    //keys.Add(key);
                }
            }

            if (readAllurl.Length &gt; 1)
            {
                //urlwords url = new urlwords();
                //url.IllegalUrls = new List&lt;string&gt;();
                foreach (string itemUrls in readAllurl)
                {
                    string[] allSplitUrls = itemUrls.Split(&#39;|&#39;);
                    foreach (string itemSplitUrls in allSplitUrls)
                    {
                        if (!string.IsNullOrEmpty(itemSplitUrls))
                        {
                            arrayList.Add(itemSplitUrls);
                            //string Keyword = itemSplitUrls;
                            //url.IllegalUrls.Add(Keyword);
                            //IllegalUrls.Add(itemSplitUrls);
                        }
                    }
                }
                //urls.Add(url);
            }
            else
            {
                if (readAllurl.Length == 1)
                {
                    string[] allSplitUrls = readAllurl[0].Split(&#39;|&#39;);
                    //urlwords url = new urlwords();
                    //url.IllegalUrls = new List&lt;string&gt;();
                    foreach (string itemSplitUrls in allSplitUrls)
                    {
                        if (!string.IsNullOrEmpty(itemSplitUrls))
                        {
                            arrayList.Add(itemSplitUrls);
                            //IllegalUrls.Add(itemSplitUrls);
                            //string Keyword = itemSplitUrls;
                            //url.IllegalUrls.Add(Keyword);
                        }
                    }
                    //urls.Add(url);
                }
            }
        }
        //我们在将list转换成String[]数组 
        Words = (string[])arrayList.ToArray(typeof(string));
    }

    /// &lt;summary&gt;
    /// 过滤替换敏感词
    /// &lt;/summary&gt;
    /// &lt;param name=&#34;sourceText&#34;&gt;需要过滤替换的原内容&lt;/param&gt;
    /// &lt;param name=&#34;replaceChar&#34;&gt;敏感词替换的字符;默认替换为‘*’&lt;/param&gt;
    /// &lt;returns&gt;返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容替换时发生错误;“2”:需要替换的文本内容为空;其余则返回替换成功的字符串内容&lt;/returns&gt;
    public string FilterWithChar(string sourceText, char replaceChar = &#39;*&#39;)
    {
        if (!string.IsNullOrEmpty(sourceText))
        {
            string result = &#34;&#34;;
            WordsSearch wordsSearch = new WordsSearch();
            try
            {
                wordsSearch.SetKeywords(Words);
            }
            catch (Exception ex)
            {
                result = &#34;0&#34;;
                return result;
            }

            try
            {
                result = wordsSearch.Replace(sourceText, replaceChar);
                return result;
            }
            catch (Exception ex)
            {
                return result = &#34;1&#34;;
            }
        }
        else
        {
            return &#34;2&#34;;
        }
    }

    /// &lt;summary&gt;
    /// 查找原内容中知否包含敏感/违禁词
    /// &lt;/summary&gt;
    /// &lt;param name=&#34;sourceText&#34;&gt;需要判断的原内容&lt;/param&gt;
    /// &lt;returns&gt;返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容查询时发生错误;“2”:需要替换的文本内容为空;“3”:原内容中包含敏感/违禁词汇;“4”:原内容中不包含敏感/违禁词汇&lt;/returns&gt;
    public string FindSensitiveKey(string sourceText)
    {
        string result = &#34;&#34;;
        if (!string.IsNullOrEmpty(sourceText))
        {
            WordsSearch wordsSearch = new WordsSearch();
            try
            {
                wordsSearch.SetKeywords(Words);
            }
            catch (Exception ex)
            {
                result = &#34;0&#34;;
                return result;
            }

            try
            {
                bool res = wordsSearch.ContainsAny(sourceText);
                if (res)
                {
                    result = &#34;3&#34;;
                    return result;
                }
                else
                {
                    result = &#34;4&#34;;
                    return result;
                }
            }
            catch (Exception ex)
            {
                return result = &#34;1&#34;;
            }
        }
        else
        {
            result = &#34;2&#34;;
        }
        return result;
    }

    /// &lt;summary&gt;
    /// 把对象写入到json文件中
    /// &lt;/summary&gt;
    /// &lt;param name=&#34;obj&#34;&gt;&lt;/param&gt;
    /// &lt;returns&gt;&lt;/returns&gt;
    public static void Write(List&lt;keywords&gt; jsonData, List&lt;urlwords&gt; urlJsonData, string filename)
    {
        var directorypath = Directory.GetCurrentDirectory();
        string strFileName = directorypath + &#34;\\&#34; + filename + &#34;.json&#34;;
        string ListJson = &#34;&#34;;
        if (jsonData != null)
        {
            ListJson = JsonConvert.SerializeObject(jsonData);
        }
        else
        {
            ListJson = JsonConvert.SerializeObject(urlJsonData);
        }

        Console.WriteLine(ListJson);

        writeJsonFile(strFileName, ListJson);

        //将序列化的json字符串内容写入Json文件,并且保存
        void writeJsonFile(string path, string jsonConents)
        {
            using (FileStream fs = new FileStream(path, FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite, FileShare.ReadWrite))
            {
                //如果json文件中有中文数据,可能会出现乱码的现象,那么需要加上如下代码
                Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
                using (StreamWriter sw = new StreamWriter(fs, Encoding.GetEncoding(&#34;GB2312&#34;)))
                {
                    sw.WriteLine(jsonConents);
                }
            }
        }
    }
}

}

五、写API接口

代码语言:javascript
复制
/// <summary>
/// 进行敏感词脱敏
/// </summary>
/// <param name="sourctText">需要脱敏的文本内容</param>
/// <returns></returns>
[HttpPost]
public IActionResult sensitive_words_replace2(string sourctText)
{
string resultStr = "";
//实例化敏感词库
subContentCheck strCheck = new subContentCheck(_hostingEnv);
if (string.IsNullOrEmpty(sourctText))
{
return Json(new { code = 230, msg = "需要替换的文本内容为空!", resultStr = resultStr });
}
try
{
resultStr = strCheck.FilterWithChar(sourctText);
string resMsg = "";
int resCode = 200;
if (resultStr=="0")
{
resCode = 210;
resultStr = "";
resMsg = "设置违禁词时发生错误,请联系管理员!";
}else if (resultStr=="1")
{
resCode = 240;
resultStr = "";
resMsg = "敏感内容替换时发生错误!";
}
else if (resultStr == "2")
{
resCode = 260;
resultStr = "";
resMsg = "需要替换的文本内容为空!";
}
else
{
resCode = 200;
resMsg = "敏感词替换请求成功!";
}
return Json(new { code = resCode, msg = resMsg, resultStr = resultStr });
}
catch (Exception ex)
{
return Json(new { code = 220, msg = "敏感内容替换时发生错误!", resultStr = "" });
}
}

/// <summary>
/// 进行敏感词判断
/// </summary>
/// <param name="sourctText">需要脱敏的文本内容</param>
/// <returns></returns>
[HttpPost]
public IActionResult whether_sensitive_words(string sourctText)
{
string resultStr = "";
//实例化敏感词库
subContentCheck strCheck = new subContentCheck(_hostingEnv);
if (string.IsNullOrEmpty(sourctText))
{
return Json(new { code = 230, msg = "需要替换的文本内容为空!", resultStr = resultStr });
}
try
{
resultStr = strCheck.FindSensitiveKey(sourctText);
string resMsg = "";
int resCode = 200;
if (resultStr == "0")
{
resCode = 210;
resultStr = "";
resMsg = "设置违禁词时发生错误,请联系管理员!";
}
else if (resultStr == "1")
{
resCode = 240;
resultStr = "";
resMsg = "敏感内容匹配时发生错误!";
}
else if (resultStr == "2")
{
resCode = 260;
resultStr = "";
resMsg = "需要判断的文本内容为空!";
}
else if (resultStr == "3")
{
resCode = 270;
resultStr = "";
resMsg = "内容中含有敏感/违禁词!";
}
else
{
resCode = 200;
resMsg = "内容中不含敏感/违禁词!";
}
return Json(new { code = resCode, msg = resMsg, resultStr = resultStr });
}
catch (Exception ex)
{
return Json(new { code = 220, msg = "敏感内容匹配时发生错误!", resultStr = "" });
}
}

六、前端封装JS方法

代码语言:javascript
复制
/**

  • 敏感词/违禁词替换
  • @param {string} sourctText 需要进行替换的内容
  • @param {string} boxid 将替换成功之后的内容赋值的元素容器id属性名
  • @param {object} layui Layui实例
  • @returns 替换之后的文本内容
    */
    function sensitive_words_replace(sourctText, boxid, layui) {
    let resultStr = "";
    //let url = ["/Home/sensitive_words_replace", "/Home/sensitive_words_replace1", "/Home/sensitive_words_replace2"];
    $.ajax({
    url: "/Home/sensitive_words_replace2",//请求后端接口的路径
    dataType: "JSON",
    type: "POST",
    data: {
    "sourctText": sourctText
    },
    success: function (res) {
    let resCode = res.code;
    let resMsg = res.msg;
    if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) {
    //返回数据后关闭loading
    layer.closeAll();
    resultStr = res.resultStr;
    layui.layer.alert(resMsg, { icon: 5, title: "温馨提示", closeBtn: 0 });
    } else if (resCode == 200 || resCode == "200") {
    resultStr = res.resultStr;
    $("#" + boxid).val(resultStr);
    //返回数据后关闭loading
    layer.closeAll();
    }
    },
    error: function (error) {
    //返回数据后关闭loading
    layer.closeAll();
    layui.layer.alert(error, { icon: 5, title: "温馨提示", closeBtn: 0 });
    }
    });
    return resultStr;
    }

/**

  • 查询是否包含敏感/违禁词
  • @param {string} sourctText 需要进行替换的内容
  • @param {string} boxid 将替换成功之后的内容赋值的元素容器id属性名
  • @param {object} layui Layui实例
  • @returns 返回Bool;包含:“true”;不包含:“false”
    */
    function whether_sensitive_words(sourctText, boxid, layui) {
    let resultBool = false;
    $.ajax({
    url: "/Home/whether_sensitive_words",//请求后端接口的路径
    dataType: "JSON",
    type: "POST",
    async: false,//此处需要注意的是要想获取ajax返回的值这个async属性必须设置成同步的,否则获取不到返回值
    data: {
    "sourctText": sourctText
    },
    success: function (res) {
    let resCode = res.code;
    let resMsg = res.msg;
    if ((resCode == "210" || resCode == 210) || (resCode == 220 || resCode == "220") || (resCode == 230 || resCode == "230") || (resCode == 240 || resCode == "240") || (resCode == 260 || resCode == "260")) {
    resultBool = false;
    layui.layer.alert(resMsg, { icon: 5, title: "温馨提示", closeBtn: 0 });
    } else if (resCode == 270 || resCode == "270") {
    resultBool = true;
    } else if (resCode == 200 || resCode == "200") {
    resultBool = false;
    //返回数据后关闭loading
    layer.closeAll();
    }
    },
    error: function (error) {
    layui.layer.alert(error, { icon: 5, title: "温馨提示", closeBtn: 0 });
    }
    });
    return resultBool;
    }