public class RegularUtil extends Object
| 限定符和类型 | 字段和说明 |
|---|---|
static String |
REGEX_VARIABLE |
static Regular |
regularContain |
static Regular |
regularMatch |
static Regular |
regularMatchPrefix |
static String |
TAG_BEGIN |
static String |
TAG_END |
| 构造器和说明 |
|---|
RegularUtil() |
| 限定符和类型 | 方法和说明 |
|---|---|
static String |
cut(String text,
boolean contains,
String... tags)
取tags[i-2]与tags[i-1]之间的文本
|
static String |
cut(String text,
String... tags) |
static List<String> |
cuts(String text,
boolean contains,
String... tags) |
static List<String> |
cuts(String text,
String... tags) |
static List<String> |
fetch(String src,
String regx) |
static List<String> |
fetch(String src,
String regx,
Regular.MATCH_MODE mode) |
static List<String> |
fetch(String src,
String regx,
Regular.MATCH_MODE mode,
int idx) |
static List<List<String>> |
fetchAllTag(String txt,
String... tags)
提取单标签+双标签
不区分大小写
0:全文 1:开始标签 2:标签name 3:标签体 (单标签时null) 4:结束标签 (单标签时null)
|
static List<String> |
fetchAttribute(String txt,
String attribute)
取出d属性值
0全文 1:属性name 2:引号('|") 3:属性值
fetchAttributeValues(txt,"id","name");
|
static List<List<String>> |
fetchAttributeList(String txt,
String attribute)
取出所有属性值
0全文 1:属性name 2:引号('|") 3:属性值
fetchAttributeValues(txt,"id");
|
static String |
fetchAttributeValue(String txt,
String attribute) |
static List<String> |
fetchAttributeValues(String txt,
String attribute) |
static List<List<String>> |
fetchPairedTag(String txt,
String... tags)
提取双标签<div>content<div>
依次取出p,table,div中的内容 有嵌套时只取外层
只能提取同时有 开始结束标签的内容,不能提取单标签内容如<img> <br/>
支持不同标签嵌套,但不支持相同标签嵌套
不区分大小写
0:全文 1:开始标签 2:标签name 3:标签体 4:结束标签
|
static List<List<String>> |
fetchs(String src,
String regx) |
static List<List<String>> |
fetchs(String src,
String regx,
Regular.MATCH_MODE mode)
提取子串
|
static List<List<String>> |
fetchSingleTag(String txt,
String... tags)
提取单标签 如<img> <br/>
如果传入div等带有结束标签的参数 则只取出开始标签 <div>
不区分大小写
0:全文 1::标签name
|
static String |
fetchUrl(String src) |
static List<String> |
fetchUrls(String src) |
static List<String> |
filter(List<String> src,
String regx,
Regular.MATCH_MODE mode,
Regular.FILTER_TYPE type) |
static List<List<String>> |
getAllTagAndBodyWithAttribute(String src,
String attribute)
获取所有 包含attribute属性 的标签与标签体,不支持相同标签嵌套
[
[整个标签含标签体,开始标签,结束标签,标签体,标签名称],
[整个标签含标签体,开始标签,结束标签,标签体,标签名称]
]
|
static List<List<String>> |
getAllTagAndBodyWithAttributeValue(String src,
String attribute,
String value)
获取所有 包含attribute属性包含value值 的标签与标签体
单标签只匹配有/>结尾的情况,避免与双标签的开始标签混淆
如class="a" : attribute=class value=a
style="width:100px;" :attribute=style value=width
[
[整个标签含标签体,开始标签,结束标签,标签体,标签名称],
[整个标签含标签体,开始标签,结束标签,标签体,标签名称]
]
|
static int |
indexOf(String src,
String regx) |
static int |
indexOf(String src,
String regx,
int begin)
字符串下标 regx在src中首次出现的位置
|
static boolean |
isDate(String str) |
static boolean |
isDateTime(String str) |
static boolean |
match(String src,
String regx) |
static boolean |
match(String src,
String regx,
Regular.MATCH_MODE mode) |
static List<String> |
pick(List<String> src,
String regx,
Regular.MATCH_MODE mode)
过滤 保留匹配项
|
static List<String> |
regexpValue(String src,
String regex,
Regular.MATCH_MODE mode)
表达式匹配值长度
|
static String |
removeAllHtmlTag(String src) |
static String |
removeAllTag(String src)
清除所有标签(只清除标签,不清除标签体)
|
static String |
removeAllTagAndBodyWithAttribute(String src,
String attribute)
删除所有 包含attribute属性 的标签与标签体
RegularUtil.removeAllTagAndBodyWithAttribute(str,"class")
<input type="text" class="a"/>
<input type="text" class="a"></input>
<input type="text" class = "a"></input>
<input type="text" class></input>
<input type="text" class/>
<input type="text" a="class"></input>(不匹配)
|
static String |
removeAllTagAndBodyWithAttributeValue(String src,
String attribute,
String value)
RegularUtil.removeAllTagAndBodyWithAttributeValue(s,"class","a")
删除所有 包含attribute属性=value值 的标签与标签体
<input type="text" class="a"/>
<input type="text" class="a"/></input/>
<input type="text" class="a b"/></input/>如果需要不匹配可以使用"[^\\s]a[^\\s]"
<input type="text" class="b a"/></input/>
<input type="text" class="ab"/></input/>(不匹配)如果需要匹配可以使用"a
|
static String |
removeEmptyTag(String src) |
static String |
removeHtmlEmptyTag(String src) |
static String |
removeHtmlTag(String src,
String... tags) |
static String |
removeHtmlTagExcept(String src,
String... tags) |
static String |
removeHtmlTagExceptSimple(String src)
删除简单标签外的其他标签
|
static String |
removeHtmlTagWithBody(String src,
String... tags) |
static String |
removeTag(String src,
String... tags)
只删除标签,不删除标签体
|
static String |
removeTagExcept(String src,
String... tags)
删除 tags之外的标签"<b>"与"</b>"只写一次 "b"
只删除标签不删除标签体
|
static String |
removeTagWithBody(String src,
String... tags)
删除标签及标签体
|
static List<String> |
wipe(List<String> src,
String regx,
Regular.MATCH_MODE mode)
过滤 删除匹配项
|
public static Regular regularMatch
public static Regular regularMatchPrefix
public static Regular regularContain
public static boolean match(String src, String regx, Regular.MATCH_MODE mode)
public static List<List<String>> fetchs(String src, String regx, Regular.MATCH_MODE mode) throws Exception
src - 输入字符串 src 输入字符串regx - 表达式 regx 表达式mode - modeException - Exceptionpublic static List<List<String>> fetchs(String src, String regx) throws Exception
Exceptionpublic static List<String> fetch(String src, String regx, Regular.MATCH_MODE mode, int idx) throws Exception
Exceptionpublic static List<String> fetch(String src, String regx, Regular.MATCH_MODE mode) throws Exception
Exceptionpublic static List<String> filter(List<String> src, String regx, Regular.MATCH_MODE mode, Regular.FILTER_TYPE type)
public static List<String> pick(List<String> src, String regx, Regular.MATCH_MODE mode)
src - srcregx - regxmode - modepublic static List<String> wipe(List<String> src, String regx, Regular.MATCH_MODE mode)
src - srcregx - regxmode - modepublic static int indexOf(String src, String regx, int begin)
src - srcregx - regxbegin - 有效开始位置public static List<String> regexpValue(String src, String regex, Regular.MATCH_MODE mode)
src - srcregex - regexmode - modepublic static String removeAllTag(String src)
src - srcpublic static String removeAllTagAndBodyWithAttribute(String src, String attribute)
src - srcattribute - attributepublic static String removeAllTagAndBodyWithAttributeValue(String src, String attribute, String value)
src - srcattribute - attributevalue - valuepublic static List<List<String>> getAllTagAndBodyWithAttribute(String src, String attribute) throws Exception
src - srcattribute - attributeException - Exceptionpublic static List<List<String>> getAllTagAndBodyWithAttributeValue(String src, String attribute, String value) throws Exception
src - srcattribute - attributevalue - valueException - Exceptionpublic static String removeTagExcept(String src, String... tags)
src - srctags - tagspublic static String removeTag(String src, String... tags)
src - srctags - tagspublic static String removeTagWithBody(String src, String... tags)
src - srctags - tagspublic static String removeHtmlTagExceptSimple(String src)
src - srcpublic static List<List<String>> fetchPairedTag(String txt, String... tags) throws Exception
txt - texttags - 标签名,如div,span tags标签名,如div,spanException - Exceptionpublic static List<List<String>> fetchSingleTag(String txt, String... tags) throws Exception
txt - texttags - 标签名,如img brException - Exceptionpublic static List<List<String>> fetchAllTag(String txt, String... tags) throws Exception
txt - txttags - 标签名 tags标签名Exception - Exceptionpublic static List<List<String>> fetchAttributeList(String txt, String attribute)
txt - txtattribute - attributepublic static List<String> fetchAttribute(String txt, String attribute)
txt - txtattribute - attributepublic static String cut(String text, boolean contains, String... tags)
text - texttags - tagscontains - 是否包含开始结束标签public static boolean isDate(String str)
public static boolean isDateTime(String str)
Copyright © 2022. All rights reserved.