1 |
6a3fcb6d
|
みぞ@CrazyBeatCoder
|
package com.mizo0203.timeline.talker.util;
|
2 |
|
|
|
3 |
|
|
import org.apache.commons.io.IOUtils;
|
4 |
|
|
import org.jetbrains.annotations.NotNull;
|
5 |
|
|
|
6 |
|
|
import javax.swing.text.html.HTMLEditorKit;
|
7 |
|
|
import javax.swing.text.html.parser.ParserDelegator;
|
8 |
|
|
import java.io.IOException;
|
9 |
|
|
import java.io.InputStreamReader;
|
10 |
|
|
import java.nio.charset.Charset;
|
11 |
|
|
|
12 |
|
|
public class HTMLParser {
|
13 |
|
|
|
14 |
|
|
@NotNull
|
15 |
|
|
public String parse(@NotNull String html, @NotNull Charset encoding, boolean ignoreCharSet)
|
16 |
|
|
throws IOException {
|
17 |
|
|
try (InputStreamReader r =
|
18 |
|
|
new InputStreamReader(IOUtils.toInputStream(html, encoding), encoding)) {
|
19 |
|
|
HTMLParserCallback hp = new HTMLParserCallback();
|
20 |
|
|
ParserDelegator parser = new ParserDelegator();
|
21 |
|
|
parser.parse(r, hp, ignoreCharSet);
|
22 |
|
|
return hp.getText();
|
23 |
|
|
}
|
24 |
|
|
}
|
25 |
|
|
|
26 |
|
|
/**
|
27 |
|
|
* http://www.my-notebook.net/736a69e0-820c-423b-9047-a02b8a9eefb1.html
|
28 |
|
|
*
|
29 |
|
|
* <p>HTMLParser.java
|
30 |
|
|
*/
|
31 |
|
|
private static class HTMLParserCallback extends HTMLEditorKit.ParserCallback {
|
32 |
|
|
private final StringBuffer sb = new StringBuffer();
|
33 |
|
|
|
34 |
|
|
private String getText() {
|
35 |
|
|
return sb.toString();
|
36 |
|
|
}
|
37 |
|
|
|
38 |
|
|
@Override
|
39 |
|
|
public void handleText(@NotNull char[] data, int pos) {
|
40 |
|
|
sb.append(new String(data));
|
41 |
|
|
sb.append(System.getProperty("line.separator"));
|
42 |
|
|
}
|
43 |
|
|
}
|
44 |
|
|
}
|