Dim internet As New Net.WebClient Dim allHtml As String = internet.DownloadString("http://homepage1.nifty.com/rucio/main/main.htm") '<body>~</body>を抜き出す。(簡易版。対応できない場合あり。) Dim regOption = System.Text.RegularExpressions.RegexOptions.IgnoreCase Or System.Text.RegularExpressions.RegexOptions.Singleline Dim bodyContent As String = System.Text.RegularExpressions.Regex.Match(allHtml, "<body.*?>(.*?)</body>", regOption).Groups(1).Value 'スクリプトタグを除去。(簡易版。対応できない場合多々あり。) Dim html As String = System.Text.RegularExpressions.Regex.Replace(bodyContent, "<script.*?>.*?</script>", "", regOption) 'この時点の改行を除去 html = html.Replace(vbCrLf, "") html = html.Replace(vbCr, "") html = html.Replace(vbLf, "") '改行に置換すべきタグの処理。(簡易版。対応できない場合あり。) '<br/> html = System.Text.RegularExpressions.Regex.Replace(html, "<br\s*?/{0,1}>", vbNewLine, regOption) '</tr> html = System.Text.RegularExpressions.Regex.Replace(html, "</\s*?tr>", vbNewLine, regOption) '</p> html = System.Text.RegularExpressions.Regex.Replace(html, "</\s*?p>", vbNewLine, regOption) '</h1>~</h6> html = System.Text.RegularExpressions.Regex.Replace(html, "</\s*?h[1-6]>", vbNewLine, regOption) '他のすべてのタグを除去して本文のみにする。(簡易版。対応できない場合あり。) html = System.Text.RegularExpressions.Regex.Replace(html, "</{0,1}\w+?.*?>", "", regOption) html = html.Replace(" ", " ") html = html.Replace(">", ">") html = html.Replace("<", "<") html = html.Replace("&", "&") html = html.Replace(vbTab, "")