1
|
package com.mizo0203.timeline.talker.util;
|
2
|
|
3
|
import org.apache.commons.io.IOUtils;
|
4
|
import org.jetbrains.annotations.NotNull;
|
5
|
|
6
|
import javax.swing.text.html.HTMLEditorKit;
|
7
|
import javax.swing.text.html.parser.ParserDelegator;
|
8
|
import java.io.IOException;
|
9
|
import java.io.InputStreamReader;
|
10
|
import java.nio.charset.Charset;
|
11
|
|
12
|
public class HTMLParser {
|
13
|
|
14
|
@NotNull
|
15
|
public String parse(@NotNull String html, @NotNull Charset encoding, boolean ignoreCharSet)
|
16
|
throws IOException {
|
17
|
try (InputStreamReader r =
|
18
|
new InputStreamReader(IOUtils.toInputStream(html, encoding), encoding)) {
|
19
|
HTMLParserCallback hp = new HTMLParserCallback();
|
20
|
ParserDelegator parser = new ParserDelegator();
|
21
|
parser.parse(r, hp, ignoreCharSet);
|
22
|
return hp.getText();
|
23
|
}
|
24
|
}
|
25
|
|
26
|
/**
|
27
|
* http://www.my-notebook.net/736a69e0-820c-423b-9047-a02b8a9eefb1.html
|
28
|
*
|
29
|
* <p>HTMLParser.java
|
30
|
*/
|
31
|
private static class HTMLParserCallback extends HTMLEditorKit.ParserCallback {
|
32
|
private final StringBuffer sb = new StringBuffer();
|
33
|
|
34
|
private String getText() {
|
35
|
return sb.toString();
|
36
|
}
|
37
|
|
38
|
@Override
|
39
|
public void handleText(@NotNull char[] data, int pos) {
|
40
|
sb.append(new String(data));
|
41
|
sb.append(System.getProperty("line.separator"));
|
42
|
}
|
43
|
}
|
44
|
}
|