富文本内容转换

文本中识别敏感词、表情包、链接、数字、字母并高亮

// 匹配等级，从左向右依次降低
var levels = ["link", "emoji", "joke", "numberalphabet"];
// 对应规则下的正则，样式和渲染内容
var rulesMap = {
  link: {
    tag: "a",
    regex:
      /(((ht|f)tps?):\/\/)+[\w-]+(\.[\w-]+)+([\w.,@?^=%&:;/~+#-]*[\w@?^=%&;/~+#-])?/,
    regexAttribute: "gi",
    attr: {
      target: "_blank",
      href: (content) => content,
    },
  },
  emoji: {
    tag: "img",
    regex: /\[.*?\]/,
    regexAttribute: "g",
    single: true,
    attr: {
      style: "width: 25px; height: 25px",
      src: (content) => content || "",
      alt: "",
    },
  },
  joke: {
    tag: "span",
    regex: "",
    regexAttribute: "g",
    attr: {
      style: "color: #ff0; background-color: #f00",
    },
  },
  numberalphabet: {
    tag: "span",
    regex: /[a-zA-Z0-9]/,
    regexAttribute: "g",
    attr: {
      style: "color: #ff0; background-color: #f00",
    },
  },
};

function isFunction(val) {
  return Object.prototype.toString.call(val) === "[object Function]";
}

function isObject(val) {
  return Object.prototype.toString.call(val) === "[object Object]";
}

function isArray(val) {
  return Object.prototype.toString.call(val) === "[object Array]";
}

function isString(val) {
  return Object.prototype.toString.call(val) === "[object String]";
}

/**
 * 生成标签内容
 * @param {*} type
 * @param {*} content
 */
function renderHtmlTag(type, content) {
  var { tag, single, attr } = rulesMap[type];
  var attributesArr = Object.entries(attr).map(([key, value]) => {
    if (isFunction(value)) {
      return `${key}="${value()}"`;
    } else {
      return `${key}="${value}"`;
    }
  });

  return single
    ? `<${tag} ${attributesArr.join(" ")} />`
    : `<${tag} ${attributesArr.join(" ")}>${content}</${tag}>`;
}

/**
 * @function
 * @description 获取高亮内容
 * @param content 内容
 * @param jokeMatchWords 敏感词数组
 */
function getHighlight(content = [], jokeMatchWords) {
  var renderArray = [content];
  // 敏感词去重 + 排序
  var noRepeatJokeMatchWords = Array.from(new Set(jokeMatchWords)).sort(
    (a, b) => b.length - a.length
  );
  // 生成正确匹配规则下的带有类型的正则匹配规则
  var regExpArrWithType = getFlatRegExpArr(levels, noRepeatJokeMatchWords);
  // 根据匹配规则生成对应匹配到的内容
  regExpArrWithType.forEach(({ type, regExp }) => {
    renderArray = recursiveSplitContent(type, regExp, renderArray);
  });
  var renderHtml = this.renderHtmlByJudgeType(renderArray, "");

  return renderHtml;
}

/**
 * 根据类型将 html 结构进行拼接
 * @param {*} renderContent
 * @param {*} htmlString
 */
function renderHtmlByJudgeType(renderContent, htmlString) {
  if (isArray(renderContent)) {
    renderContent.forEach((item) => {
      htmlString += renderHtmlByJudgeType(item, "");
    });
  } else if (isObject(renderContent)) {
    var { type, content } = renderContent;
    htmlString += renderHtmlTag(type, content);
  } else if (isString(renderContent)) {
    htmlString += renderContent;
  }
  return htmlString;
}

/**
 * 递归分割内容
 * @param {*} type
 * @param {*} regExp
 * @param {*} renderContent
 */
function recursiveSplitContent(type, regExp, renderContent) {
  if (isArray(renderContent)) {
    renderContent = renderContent.map((item) =>
      recursiveSplitContent(type, regExp, item)
    );
  } else if (isString(renderContent)) {
    var findWords = renderContent.match(regExp);
    if (findWords) {
      // findWords 去重
      findWords = Array.from(new Set(findWords));
      var nextRoundContentArray = [];
      findWords.forEach((word) => {
        if (nextRoundContentArray.length) {
          nextRoundContentArray = nextRoundContentArray.map((item) =>
            recursiveSplitContent(type, word, item)
          );
        } else {
          var currentSplitArray = renderContent.split(word);
          currentSplitArray.forEach((item, index) => {
            if (index !== 0) {
              nextRoundContentArray.push({ type, content: word });
            }
            nextRoundContentArray.push(item);
          });
        }
      });
      renderContent = nextRoundContentArray;
    }
  }
  return renderContent;
}

/**
 * 扁平化后的一维带有类型的正则匹配数组
 * @param {*} levels
 * @param {*} jokeMatchWords
 */
function getFlatRegExpArr(levels, jokeMatchWords) {
  var arr = levels.map((level) => ({
    type: level,
    regExp: getRegExpRoundJokeArr(level, jokeMatchWords),
  }));
  return arr
    .map(({ type, regExp }) => {
      return regExp.map((item) => ({ type, regExp: item }));
    })
    .flat(Infinity);
}

/**
 * 生成包含敏感词的匹配规则数组
 * @param {*} type
 * @param {*} jokeMatchWords
 */
function getRegExpRoundJokeArr(type, jokeMatchWords) {
  var { regex, regexAttribute } = rulesMap[type],
    regExpArray = [];
  if (type === "joke") {
    jokeMatchWords.forEach((word) => {
      regExpArray.push(new RegExp(word, regexAttribute));
    });
  } else {
    regExpArray.push(new RegExp(regex, regexAttribute));
  }
  return regExpArray;
}

// ========== 测试 ==========
var jokeMatchWords = ["共产党", "习近平", "习近平的1", "嘻嘻", "哈哈", "色情"];
var text =
  "https://www.google.com共产党的内容有习近平1231真的习近平https://www.google.com3你妹的习近平的1234sd2情['沮丧']的https://www.baidu.com图片4124色情的人还有aab色bccdd嘻嘻和span>哈哈哈https://www.google.com";
document.querySelector("#highlight").innerHTML = getHighlight(
  text,
  jokeMatchWords
);

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

富文本内容转换 ​

文本中识别敏感词、表情包、链接、数字、字母并高亮 ​

富文本内容转换

文本中识别敏感词、表情包、链接、数字、字母并高亮