|
|
|
|
î¤éÞ / 2010-01-21 / ¿ÀÈÄ 3:27:59 / 24.10.174.180
Á¶È¸¼ö : 628
|
[Ãßõ] [Æۿ±Û] ÀÚ¹Ù½ºÆ®¸³Æ® HTML ÅÂ±× Á¦°Å |
private String getText(String content) {
Pattern SCRIPTS = Pattern.compile("<(no)?script[^>]*>.*?</(no)?script>",Pattern.DOTALL);
Pattern STYLE = Pattern.compile("<style[^>]*>.*</style>",Pattern.DOTALL);
Pattern TAGS = Pattern.compile("<(\"[^\"]*\"|\'[^\']*\'|[^\'\">])*>");
Pattern nTAGS = Pattern.compile("<\\w+\\s+[^<]*\\s*>");
Pattern ENTITY_REFS = Pattern.compile("&[^;]+;");
Pattern WHITESPACE = Pattern.compile("\\s\\s+");
Matcher m;
m = SCRIPTS.matcher(content);
content = m.replaceAll("");
m = STYLE.matcher(content);
content = m.replaceAll("");
m = TAGS.matcher(content);
content = m.replaceAll("");
m = ENTITY_REFS.matcher(content);
content = m.replaceAll("");
m = WHITESPACE.matcher(content);
content = m.replaceAll(" ");
return content;
}
|
|
|