Elasticsearchを使ってタグ画像の統計を行ったことは昔のことになり、最近見直した際にはかなり慣れていない状態でした。現在はバージョン7.12を使用しています。かつてはTransportClientを利用していましたが、今は公式に推奨されているRestHighLevelClientを使用しています。
最近RestHighLevelClientを使ったと感じて便利だったので、いくつかの基本的な機能を記録してみます。
1. クライアントの初期化と終了
public static RestHighLevelClient getClient(String host, int port) {
LOGGER.info("Elasticsearchの初期化");
client = new RestHighLevelClient(
RestClient.builder(new HttpHost(host, port, "http")));
return client;
}
public static void closeES() {
LOGGER.info("Elasticsearchのクローズ");
if(client != null) {
try {
client.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
2. インデックスとマッピングの作成
public boolean createIndexMapping(RestHighLevelClient client, String indexName) {
LOGGER.info("インデックスとマッピングの作成...");
CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);
createIndexRequest.settings(Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0));
try {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("properties")
.startObject("コンテンツ")
.field("type", "text") // データタイプ
.field("index", "true") // デフォルト
.field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_smart")
.endObject()
.startObject("日付")
.field("type", "date") // データタイプ
.field("index", "true") // デフォルト
.endObject()
.startObject("タイトル")
.field("type", "text") // データタイプ
.field("index", "true") // デフォルト
.field("analyzer", "ik_max_word")
.field("search_analyzer", "ik_smart")
.endObject()
.endObject()
.endObject();
createIndexRequest.mapping(xContentBuilder);
CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
return createIndexResponse.isAcknowledged();
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
3. ドキュメントの取得
public Map fetchDocumentById(RestHighLevelClient client, String index, String documentId) {
GetRequest getRequest = new GetRequest(index, documentId);
GetResponse getResponse = null;
try {
getResponse = client.get(getRequest, RequestOptions.DEFAULT);
if(getResponse.isExists()) {
return getResponse.getSource();
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
4. 複数ドキュメントの削除
public Object deleteDocumentsByIds(RestHighLevelClient client, String index, List<String> documentIds) {
DeleteByQueryRequest request = new DeleteByQueryRequest(index);
IdsQueryBuilder queryBuilder = new IdsQueryBuilder();
for(String id: documentIds) {
queryBuilder.addIds(id);
}
request.setQuery(queryBuilder);
BulkByScrollResponse response = null;
try {
response = client.deleteByQuery(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
return JSONObject.toJSON(response);
}
5. ドキュメントのバルクインサート
public boolean bulkInsertDocuments(RestHighLevelClient client, String index, List<Document> documents) {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("10s");
for(Document doc : documents) {
Map data = new HashMap<>();
data.put("id", doc.getId());
data.put("title", doc.getTitle());
data.put("content", doc.getContent());
bulkRequest.add(new IndexRequest(index).id(String.valueOf(doc.getId())).source(data));
}
try {
BulkResponse responses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
if(RestStatus.CREATED == responses.status()) {
return true;
}
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
6. ドキュメントのバルクアップデート
public boolean bulkUpdateDocuments(RestHighLevelClient client, String index, List<Document> documents) {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("10s");
for(Document doc : documents) {
Map data = new HashMap<>();
data.put("id", doc.getId());
data.put("title", doc.getTitle());
data.put("content", doc.getContent());
bulkRequest.add(new UpdateRequest().index(index).id(String.valueOf(doc.getId())).doc(data));
}
try {
BulkResponse responses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
if(RestStatus.OK == responses.status()) {
return true;
}
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
7. 全文検索
7.1 スクロールを使用した検索
public List<SearchHit> searchWithScroll(RestHighLevelClient client, String index, String field1, String field2, String query, int size, String[] include, String[] exclude) {
List<SearchHit> results = new ArrayList<>();
Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
SearchRequest searchRequest = new SearchRequest(index);
searchRequest.scroll(scroll);
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<a style='color: #e4393c'>");
highlightBuilder.postTags("</a>");
highlightBuilder.field(field2);
sourceBuilder.size(size);
sourceBuilder.highlighter(highlightBuilder);
sourceBuilder.query(QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery(field1, query))
.must(QueryBuilders.matchQuery(field2, query)));
sourceBuilder.fetchSource(include, exclude);
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = null;
try {
searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
String scrollId = searchResponse.getScrollId();
SearchHit[] hits = searchResponse.getHits().getHits();
while (hits != null && hits.length > 0) {
for(SearchHit hit : hits) {
results.add(hit);
}
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
try {
searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
scrollId = searchResponse.getScrollId();
hits = searchResponse.getHits().getHits();
}
if(scrollId != null) {
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
clearScrollRequest.addScrollId(scrollId);
ClearScrollResponse clearScrollResponse = null;
try {
clearScrollResponse = client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
boolean succeeded = clearScrollResponse.isSucceeded();
}
return results;
}
7.2 search_afterを使用した検索
public List<SearchHit> searchWithSearchAfter(RestHighLevelClient client, String index, String field1, String field2, String query, int size, String[] include, String[] exclude) {
List<SearchHit> results = new ArrayList<>();
SearchRequest request = new SearchRequest(index);
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.preTags("<a style='color: #e4393c'>");
highlightBuilder.postTags("</a>");
highlightBuilder.field(field2);
QueryBuilder queryBuilder = QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery(field1, query))
.must(QueryBuilders.matchQuery(field2, query));
sourceBuilder.query(queryBuilder);
sourceBuilder.fetchSource(include, exclude);
sourceBuilder.size(size);
sourceBuilder.sort("_id", SortOrder.DESC);
sourceBuilder.highlighter(highlightBuilder);
request.source(sourceBuilder);
SearchResponse response = null;
try {
response = client.search(request, RequestOptions.DEFAULT);
SearchHit[] hits = response.getHits().getHits();
while(hits.length > 0) {
for(SearchHit hit : hits) {
results.add(hit);
}
SearchHit last = hits[hits.length - 1];
sourceBuilder.searchAfter(last.getSortValues());
response = client.search(request, RequestOptions.DEFAULT);
hits = response.getHits().getHits();
}
} catch (IOException e) {
e.printStackTrace();
}
return results;
}
8. 統計分析
public static Map> analyzeCounts(RestHighLevelClient client, String index, String query, String field1, String field2, String... aggFields) {
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.fetchSource(false);
SearchRequest request = new SearchRequest(index);
QueryBuilder queryBuilder = QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery(field1, query))
.must(QueryBuilders.matchQuery(field2, query));
sourceBuilder.query(queryBuilder);
Map> fieldAggMap = new LinkedHashMap<>();
TermsAggregationBuilder aggregationBuilder;
SearchResponse response = null;
for(String fieldName : aggFields) {
aggregationBuilder = AggregationBuilders.terms("agg_name").field(fieldName);
sourceBuilder.aggregation(aggregationBuilder);
request.source(sourceBuilder);
sourceBuilder.size(0);
try {
response = client.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
Aggregations aggregations = response.getAggregations();
Terms byTopicAggregation = aggregations.get("agg_name");
List extends Terms.Bucket> buckets = byTopicAggregation.getBuckets();
Map bucketsFieldsAgg = new LinkedHashMap<>();
buckets.forEach(bucket ->
bucketsFieldsAgg.put(bucket.getKeyAsString(), bucket.getDocCount())
);
fieldAggMap.put(fieldName, bucketsFieldsAgg);
}
return fieldAggMap;
}