我们都知道,Elasticsearch作为搜索引擎被广泛应用在各个领域,尤其是电子商务网站和App的检索,如下几乎囊括的商城大部分搜索使用场景查询定义,其中包括:
基本查询:简单的查询方式
- terms:目标列包含指定词(不分词)
- match:会把输入的“短语”进行分解成分词,分词器可自定义
- bool match:match分解成多个词,并且是and的关系,默认是or的关系
- match phrase:进行分词,但要指定分词之间词语的个数
- match phrase prefix:进行分词,分词按前缀匹配
- multi-match:作用于多个fields的match查询
- query string:支持lucence查询语法,title:crime10+title:punishment-otitle:cat,用±表示include和exclude,可以用表示权重,例如: field^2表示权重为2,默认权重为1
- field查询:是queyr查询的简化版本
- ids查询:field查询的特殊情况,只针对id列
- prefix查询:类似于term查询,按前缀必须匹配
- fuzzy like this查询:相似文本的查询,怎么计算相似度呢
- fuzzy like this field查询:选定title
- fuzzy查询:模糊查询,可定义词语之间的编辑距离(fuzziness)得到
- wildcard查询:使用* ?的term查询
- more like this:like且设定好范围
- range:针对数值类型范围查询,from to
Filter过滤查询 ,对查询结果过滤
- term{“year”:2021},过滤结果中year为2021的数据
- range :过滤结果中指定列在给定范围的数据l
- exists :查询结果中存在某列的数据
- missing :查询结果中缺失某列的数据
- scripts:bool表达式 ,查询结果中满足脚本指定条件的数据
- type :按类型过滤
- limit :限定返回一定数量的结果
- ids :限定返回特殊的id集合
- not,and,or :多个条件的组合查询
组合查询 :多个条件的组合查询
- bool:{“must”:{查询A},“should”:{查询B}}
- boosting:查询权重,positive的分数增高,negative分数降低
- costant score:恒定分数;
- indices:实现在多个Index上查询;
- custom filters score:
另外,针对Elasticsearch搜索场景,此处不选择官方提供的查询客户端(后期会@Deprecated),而是使用 okhttp 作为ES检索查询客户端,可能很多伙计都知道OkHttp是一个高效的HTTP客户端,被业界广泛应用在Http请求场景,它有以下特性:
- 支持HTTP/2,允许所有同一个主机地址的请求共享同一个socket连接 连接池减少请求延时 透明的
- GZIP压缩减少响应数据的大小
- 缓存响应内容,避免一些完全重复的请求
1、定义查询大全
基础BaseQuery定义,以及其他枚举类型定义;
@Getter
public abstract class BaseQuery<Q> implements Serializable {
private static final long serialVersionUID = -8937265256480681309L;
/**
* Field查询权重
*/
private Float boost;
public final Q boost(Float boost){
this.boost = boost;
//noinspection unchecked
return (Q)this;
}
}
public static enum ZeroTerms {
/** zero terms*/
NONE,
ALL
}
public enum Operator {
/**操作方向*/
OR,
AND
}
public enum Order {
/**排序方向*/
ASC,
DESC
}
@Getter
@Builder
public class Sort implements Serializable {
private static final long serialVersionUID = -5574272690024012413L;
/**
* 排序字段
*/
private String field;
/**
* 排序指令
*/
private Order order;
/**
* 脚本排序
*/
private Script script;
/**
* 脚本排序Filed数据类型
*/
private String type;
}
1.1 id查询
/** 精确查询:文档主键Id列表查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:18
*/
@Getter
@Builder
@EqualsAndHashCode(callSuper = true)
public class IdsQuery extends BaseQuery<IdsQuery> {
private static final long serialVersionUID = -5818467179482171316L;
/**
* ids值列表
*/
private Set<String> values;
public IdsQuery addValue(String value){
if (null == values){
values = Sets.newHashSet();
}
values.add(value);
return this;
}
public IdsQuery addValues(Collection<String> values){
if (null == this.values){
this.values = Sets.newHashSet();
}
this.values.addAll(values);
return this;
}
}
1.2 Exists(存在)查询
/** 存在查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:18
*/
@Getter
@Builder
@EqualsAndHashCode(callSuper = true)
public class ExistsQuery extends BaseQuery<ExistsQuery> {
private static final long serialVersionUID = 1738703878906147701L;
/**
* 判断字段
*/
private String field;
}
1.3 Wildcard(通配符)查询
/* * 通配符模糊匹配查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:19
*/
@Getter
@Builder
public class WildcardQuery extends BaseQuery<WildcardQuery> {
private static final long serialVersionUID = -2682500922726969971L;
/**
* 属性值
*/
private String field;
/**
* 属性value
*/
private String value;
}
1.4 Range(范围)查询
/** 范围查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:19
*/
@Getter
@Builder
public class RangeQuery extends BaseQuery<RangeQuery> {
private static final long serialVersionUID = 718814285116379481L;
/**
* 属性
*/
private String field;
/**
* 开始值
*/
private Object from;
/**
* 结束值
*/
private Object to;
/**
* 是否包括下边界,默认true
*/
private Boolean includeLower;
/**
* 是否包括上边界,默认true
*/
private Boolean includeUpper;
public RangeQuery from(Object from){
this.from = from;
return this;
}
public RangeQuery to(Object to){
this.to = to;
return this;
}
public RangeQuery includeLower(Boolean includeLower){
this.includeLower = includeLower;
return this;
}
public RangeQuery includeUpper(Boolean includeUpper){
this.includeUpper = includeUpper;
return this;
}
}
1.5 Term(精确)查询
/** 精确匹配查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:19
*/
@Getter
@Builder
public class TermQuery extends BaseQuery<TermQuery> {
private static final long serialVersionUID = -7457986485757144952L;
/**
* 匹配字段
*/
private String field;
/**
* 匹配值
*/
private Object value;
}
1.6 Terms(精确)多查询
* 多属多值精确匹配查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:19
*/
@Getter
@Builder
public class TermsQuery extends BaseQuery<TermsQuery> {
private static final long serialVersionUID = -6528812410794945781L;
/**
* 属性
*/
private String field;
/**
* 值列表
*/
private List<Object> values;
public TermsQuery addValue(String value) {
if (null == values) {
values = Lists.newArrayList();
}
values.add(value);
return this;
}
public TermsQuery addValue(Object value) {
if (null == values) {
values = Lists.newArrayList();
}
values.add(value);
return this;
}
public TermsQuery addValues(List values) {
if (null == this.values) {
this.values = Lists.newArrayList();
}
this.values.addAll(values);
return this;
}
public TermsQuery addValues(Collection values) {
if (null == this.values) {
this.values = Lists.newArrayList();
}
this.values.addAll(values);
return this;
}
}
1.7 Match(分词匹配)查询
/** 分词匹配查询
* 例如:“中国杭州”->分词为“中国”、"杭州"
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:18
*/
@Getter
@Builder
public class MatchQuery extends BaseQuery<MatchQuery> {
private static final long serialVersionUID = 4875762311528902561L;
/**
* 匹配字段
*/
private String field;
/**
* 查询类型,默认boolean
* boolean(布尔):分词器对字符串分词,然后进行相应的子查询
* phrase(短语):查询字符串,匹配所有分词
* phrase_prefix(短语前缀):查询字符串,前缀匹配
*/
private String type;
/**
* 查询值(字符串)
*/
private Object query;
/**
* 分词器
*/
private String analyzer;
/**
* 单个字段字符串如何匹配查询条件的分词,默认or
*/
private Operator operator;
/**
* 容忍度:数据不匹配且无法转换时候报错,默认false
* 设置为true时:忽略报错
*/
private Boolean lenient;
/**
* Stop分词,会将stop words停用词在索引时全部去掉
* 默认为None:搜索不到停止词
*/
private ZeroTerms zeroTermsQuery;
/**
* 匹配词最大距离,例如:god->good,编辑距离1
* 模糊度(莱文斯坦距离),默认Auto(一般默认即可)
*/
private String fuzziness;
/**
* 前置精确匹配长度:与fuzziness使用,不能模糊化初始化字符串长度,默认0
* 比如:拼写错误goosd->goods,设置为3时,goo必须精确匹配
*/
private Integer prefixLength;
/**
* 控制前置扩展成分词的数量,默认50
*/
private Integer maxExpansions;
/**
* field分词后匹配的百分比,例如80%
*/
private String minimumShouldMatch;
}
1.8 MultiMatch(分词多匹配)查询
/** 分词匹配查询:多字段Match匹配查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:18
*/
@Getter
@Builder
public class MultiMatchQuery extends BaseQuery<MultiMatchQuery> {
private static final long serialVersionUID = -734174780787419253L;
/**
* 查询多字段
*/
private List<String> fields;
/**
* 查询值
*/
private String query;
/**
* 查询类型:默认best_fields
* best_fields:匹配【从指定字段】查询每个字段评分,求最高评分
* most_fields:匹配【从指定字段】查询每个字段评分,求平均分
* cross_fields:把query拆分成分词,然后在各个字段执行匹配查询,默认情况下,只要有一个字段匹配就返回文档
* phrase:在每个字段上执行查询,返回最高评分,类似best_fields
* phrase_prefix:在每个字段上执行前缀查询,返回最高评分,类似best_fields
*/
private String type;
/**
* 平局参数:将其他field匹配查询语句的评分也考虑其中
* dis_max:查询值会使用单个最佳匹配语句_score作为整体评分
* 提供了dis_max和bool之间的折中选择,评分方式如下:
* 1、获得最佳匹配语句的评分:_score
* 2、将其他匹配语句的评分结果与tie_breaker相乘
* 3、对以上评分求和并做规范化
* score=best_field.scoreBoot+other_field.scoreBoot*tie_breaker
*/
private Float tieBreaker;
/**
* 分词器
*/
private String analyzer;
/**
* 字段匹配分词间操作,默认OR
*/
private Operator operator;
/**
* 溢出(移动)值:分词间移动间隔
* 例如:quick for匹配quick brown for,slop设置为1时匹配
*/
private Integer slop;
/**
* 容忍度:数据不匹配且无法转换时候报错,默认false
* 设置为true时:忽略报错
*/
private Boolean lenient;
/**
* Stop分词,会将stop words停用词在索引时全部去掉
* 默认为None:搜索不到停止词
*/
private ZeroTerms zeroTermsQuery;
/**
* 匹配词最大距离,例如:god->good,编辑距离1
* 模糊度(莱文斯坦距离),默认Auto(一般默认即可)
*/
private String fuzziness;
/**
* 前置精确匹配长度:与fuzziness使用,不能模糊化初始化字符串长度,默认0
* 比如:拼写错误goosd->goods,设置为3时,goo必须精确匹配
*/
private Integer prefixLength;
/**
* 控制前置扩展成分词的数量,默认50
*/
private Integer maxExpansions;
/**
* field分词后匹配的百分比,例如80%
*/
private String minimumShouldMatch;
public MultiMatchQuery addFields(List<String> fields) {
if (null == this.fields) {
this.fields = Lists.newArrayList();
}
this.fields.addAll(fields);
return this;
}
public MultiMatchQuery addField(String field) {
if (null == this.fields) {
this.fields = Lists.newArrayList();
}
this.fields.add(field);
return this;
}
}
1.9 Nested(嵌套)高级查询
/** 嵌套(nested)查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:18
*/
@Getter
@Builder
public class NestedQuery extends BaseQuery<NestedQuery> {
private static final long serialVersionUID = 2504790838597063911L;
private String path;
/**
* 查询
*/
private BaseQuery query;
/**
* 匹配子对象的分数相关性分数
*/
private ScoreMode scoreMode;
}
1.10 Script高级查询
/* * script查询
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:19
*/
@Getter
@Builder
public class ScriptQuery extends BaseQuery<ScriptQuery> {
private static final long serialVersionUID = -3995950246151932374L;
private Script script;
@Getter
@Builder
public static class Script implements Serializable {
private static final long serialVersionUID = -9222431184650362263L;
/**
* 脚本语言,默认是painless
*/
private String lang;
/**
* 脚本实现的代码
*/
private String source;
/**
* 自定义参数
*/
private Map<String, Object> params;
public Script addParam(String key, Object value) {
if (null == params) {
params = Maps.newHashMap();
}
params.put(key, value);
return this;
}
}
}
2、组合查询
2.1 bool 组合查询
评分为所有子查询评分之和;
/** 组合查询,组合第2部分定义各种组合
* @author <a href="mailto:974064580@163.com>lee possible</a>
* @date 2021-02-06 17:17
*/
@Data
@Builder
@EqualsAndHashCode(callSuper = true)
public class CombineQuery extends BaseQuery<BoolQuery> {
private static final long serialVersionUID = -3096588829428648257L;
/**
* 文档必须匹配查询内容
*/
public List<BaseQuery> must;
/**
* 文档必须不允许匹配查询内容
*/
public List<BaseQuery> mustNot;
/**
* 文档过滤匹配内容,并不影响文档匹配得分
*/
public List<BaseQuery> filter;
/**
* 文档匹配上可以增加相关得分,相当于or
*/
public List<BaseQuery> should;
/**
* 分词后匹配的百分比,例如80%
*/
private String minimumShouldMatch;
public BoolQuery addMust(BaseQuery query) {
if (must == null) {
must = Lists.newArrayList();
}
must.add(query);
return this;
}
public BoolQuery addMustNot(BaseQuery query) {
if (mustNot == null) {
mustNot = Lists.newArrayList();
}
mustNot.add(query);
return this;
}
public BoolQuery addFilter(BaseQuery query) {
if (filter == null) {
filter = Lists.newArrayList();
}
filter.add(query);
return this;
}
private BoolQuery addShould(BaseQuery query) {
if (should == null) {
should = Lists.newArrayList();
}
should.add(query);
return this;
}
}
2.2 dix_max 组合查询
评分为与高权重子查询的打分高度相关;
@Data
@Builder
@EqualsAndHashCode(callSuper = true)
public class DisMaxQuery extends BaseQuery<DisMaxQuery> {
private static final long serialVersionUID = -3096588829428648257L;
/**
* 平局参数:将其他field匹配查询语句的评分也考虑其中
*/
private Float tieBreaker;
/**
* 文档必须匹配查询内容
*/
public List<BaseQuery> must;
/**
* 文档必须不允许匹配查询内容
*/
public List<BaseQuery> mustNot;
/**
* 文档过滤匹配内容,并不影响文档匹配得分
*/
public List<BaseQuery> filter;
/**
* 文档匹配上可以增加相关得分,相当于or
*/
public List<BaseQuery> should;
/**
* 一个文档分词后匹配的百分比,例如:80%,
*/
private String minimumShouldMatch;
public DisMaxQuery addMust(BaseQuery query) {
if (must == null) {
must = Lists.newArrayList();
}
must.add(query);
return this;
}
public DisMaxQuery addMustNot(BaseQuery query) {
if (mustNot == null) {
mustNot = Lists.newArrayList();
}
mustNot.add(query);
return this;
}
public DisMaxQuery addFilter(BaseQuery query) {
if (filter == null) {
filter = Lists.newArrayList();
}
filter.add(query);
return this;
}
private DisMaxQuery addShould(BaseQuery query) {
if (should == null) {
should = Lists.newArrayList();
}
should.add(query);
return this;
}
}
3、OkHttp使用
3.1 构建OkHttp客户端
/**
* 构建OkHttp客户端
*/
private static AtomicReference<OkHttpClient> INSTANCE = new AtomicReference<>();
public OkHttpClient getClient(){
for (; ;){
//Http连接对象
OkHttpClient singleton = INSTANCE.get();
if (null != singleton){
return singleton;
}
OkHttpClient.Builder builder = new OkHttpClient.Builder();
//定义属性-自行处理
HttpPoolProperties pool = properties.getHttpPool();
if (Objects.nonNull(pool.getCallTimeout())){
builder.callTimeout(pool.getCallTimeout(), TimeUnit.MILLISECONDS);
}
if (Objects.nonNull(pool.getReadTimeout())){
builder.readTimeout(pool.getReadTimeout(), TimeUnit.MILLISECONDS);
}
if (Objects.nonNull(pool.getWriteTimeout())){
builder.writeTimeout(pool.getWriteTimeout(), TimeUnit.MILLISECONDS);
}
if (Objects.nonNull(pool.getConnectTimeout())){
builder.connectTimeout(pool.getConnectTimeout(), TimeUnit.MILLISECONDS);
}
builder.connectionPool(new ConnectionPool(pool.getMaxIdle(), pool.getKeepAlive(), TimeUnit.MILLISECONDS));
singleton = builder.build();
if (INSTANCE.compareAndSet(null, singleton)){
return singleton;
}
}
}
3.2 OkHttp客户端调用
/**
* OkHttp 请求封装
* @param request 请求
* @return 响应结果
* @throws IOException 异常
*/
public HttpResponse call(Request request)throws IOException {
long startTime = System.currentTimeMillis();
try {
Response response = client().newCall(request).execute();
HttpResponse resp;
try {
resp = new HttpResponse();
resp.setUrl(request.url());
resp.setCode(response.code());
resp.setBody(null == response.body() ? null : Objects.requireNonNull(response.body()).string());
long cost = (System.currentTimeMillis() - startTime) / 1000;
if (cost > REQUEST_TIMEOUT) {
log.error("[ES-Client]执行Http耗时太长,节点:{},耗时:{}", request.url().host(), cost);
throw new RuntimeException("ES执行操作耗时太长,请检查!");
}
} catch (Exception e) {
log.error(String.format("请求[ES-Client]发生异常,节点:%s,%d", request.url().host(), request.url().port()), e);
throw e;
} finally {
if (null != response) {
response.close();
}
}
return resp;
} catch (ConnectException e) {
//踢出lost节点
httpTargetPool.disconnect(request.url().host());
throw e;
}
}
本节仅叙述Elasticsearch的各种查询定义,以及查询客户端的选择,后续将逐步展开,逐步带大家探索如何构建搜索引擎系统。