ASP.NET过滤HTML标签只保留换行与空格的方法
本文实例讲述了ASP.NET过滤HTML标签只保留换行与空格的方法。分享给大家供大家参考。具体分析如下:
自己从网上找了一个过滤HTML标签的方法,我也不知道谁的才是原创的,反正很多都一样。我把那方法复制下来,代码如下:
/// <summary>
/// 去除HTML标记
/// </summary>
/// <paramname="NoHTML">包括HTML的源码 </param>
/// <returns>已经去除后的文字</returns>
publicstaticstringNoHTML(stringHtmlstring)
{
//删除脚本
Htmlstring=Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring=Regex.Replace(Htmlstring,@"<(.[^>]*)>","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(quot|#34);","\"",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(amp|#38);","&",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(lt|#60);","<",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(gt|#62);",">",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",
RegexOptions.IgnoreCase);
Htmlstring=Regex.Replace(Htmlstring,@"&#(\d+);","",
RegexOptions.IgnoreCase);
Htmlstring.Replace("<",""); Htmlstring.Replace(">",""); Htmlstring.Replace("\r\n",""); Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); returnHtmlstring; }