Elasticsearch検索と統計

Elasticsearchを使ってタグ画像の統計を行ったことは昔のことになり、最近見直した際にはかなり慣れていない状態でした。現在はバージョン7.12を使用しています。かつてはTransportClientを利用していましたが、今は公式に推奨されているRestHighLevelClientを使用しています。

最近RestHighLevelClientを使ったと感じて便利だったので、いくつかの基本的な機能を記録してみます。

1. クライアントの初期化と終了

public static RestHighLevelClient getClient(String host, int port) {
    LOGGER.info("Elasticsearchの初期化");
    client = new RestHighLevelClient(
            RestClient.builder(new HttpHost(host, port, "http")));
    return client;
}

public static void closeES() {
    LOGGER.info("Elasticsearchのクローズ");
    if(client != null) {
        try {
            client.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

2. インデックスとマッピングの作成

public boolean createIndexMapping(RestHighLevelClient client, String indexName) {
    LOGGER.info("インデックスとマッピングの作成...");
    CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);

    createIndexRequest.settings(Settings.builder()
            .put("index.number_of_shards", 1)
            .put("index.number_of_replicas", 0));

    try {
        XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("properties")
                .startObject("コンテンツ")
                .field("type", "text") // データタイプ
                .field("index", "true") // デフォルト
                .field("analyzer", "ik_max_word")
                .field("search_analyzer", "ik_smart")
                .endObject()
                .startObject("日付")
                .field("type", "date") // データタイプ
                .field("index", "true") // デフォルト
                .endObject()
                .startObject("タイトル")
                .field("type", "text") // データタイプ
                .field("index", "true") // デフォルト
                .field("analyzer", "ik_max_word")
                .field("search_analyzer", "ik_smart")
                .endObject()
                .endObject()
                .endObject();

        createIndexRequest.mapping(xContentBuilder);
        CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
        return createIndexResponse.isAcknowledged();
    } catch (IOException e) {
        e.printStackTrace();
    }

    return false;
}

3. ドキュメントの取得

public Map fetchDocumentById(RestHighLevelClient client, String index, String documentId) {
    GetRequest getRequest = new GetRequest(index, documentId);
    GetResponse getResponse = null;
    try {
        getResponse = client.get(getRequest, RequestOptions.DEFAULT);
        if(getResponse.isExists()) {
            return getResponse.getSource();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}

4. 複数ドキュメントの削除

public Object deleteDocumentsByIds(RestHighLevelClient client, String index, List<String> documentIds) {
    DeleteByQueryRequest request = new DeleteByQueryRequest(index);
    IdsQueryBuilder queryBuilder = new IdsQueryBuilder();
    for(String id: documentIds) {
        queryBuilder.addIds(id);
    }
    request.setQuery(queryBuilder);
    BulkByScrollResponse response = null;
    try {
        response = client.deleteByQuery(request, RequestOptions.DEFAULT);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return JSONObject.toJSON(response);
}

5. ドキュメントのバルクインサート

public boolean bulkInsertDocuments(RestHighLevelClient client, String index, List<Document> documents) {
    BulkRequest bulkRequest = new BulkRequest();
    bulkRequest.timeout("10s");
    for(Document doc : documents) {
        Map data = new HashMap<>();
        data.put("id", doc.getId());
        data.put("title", doc.getTitle());
        data.put("content", doc.getContent());

        bulkRequest.add(new IndexRequest(index).id(String.valueOf(doc.getId())).source(data));
    }

    try {
        BulkResponse responses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
        if(RestStatus.CREATED == responses.status()) {
            return true;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return false;
}

6. ドキュメントのバルクアップデート

public boolean bulkUpdateDocuments(RestHighLevelClient client, String index, List<Document> documents) {
    BulkRequest bulkRequest = new BulkRequest();
    bulkRequest.timeout("10s");
    for(Document doc : documents) {
        Map data = new HashMap<>();
        data.put("id", doc.getId());
        data.put("title", doc.getTitle());
        data.put("content", doc.getContent());

        bulkRequest.add(new UpdateRequest().index(index).id(String.valueOf(doc.getId())).doc(data));
    }

    try {
        BulkResponse responses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
        if(RestStatus.OK == responses.status()) {
            return true;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return false;
}

7. 全文検索

7.1 スクロールを使用した検索

public List<SearchHit> searchWithScroll(RestHighLevelClient client, String index, String field1, String field2, String query, int size, String[] include, String[] exclude) {
    List<SearchHit> results = new ArrayList<>();
    Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
    SearchRequest searchRequest = new SearchRequest(index);
    searchRequest.scroll(scroll);
    SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.preTags("<a style='color: #e4393c'>");
    highlightBuilder.postTags("</a>");
    highlightBuilder.field(field2);
    sourceBuilder.size(size);
    sourceBuilder.highlighter(highlightBuilder);
    sourceBuilder.query(QueryBuilders.boolQuery()
            .should(QueryBuilders.matchQuery(field1, query))
            .must(QueryBuilders.matchQuery(field2, query)));
    sourceBuilder.fetchSource(include, exclude);
    searchRequest.source(sourceBuilder);
    SearchResponse searchResponse = null;
    try {
        searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
        e.printStackTrace();
    }
    String scrollId = searchResponse.getScrollId();
    SearchHit[] hits = searchResponse.getHits().getHits();
    while (hits != null && hits.length > 0) {
        for(SearchHit hit : hits) {
            results.add(hit);
        }
        SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
        scrollRequest.scroll(scroll);
        try {
            searchResponse = client.scroll(scrollRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        scrollId = searchResponse.getScrollId();
        hits = searchResponse.getHits().getHits();
    }
    if(scrollId != null) {
        ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
        clearScrollRequest.addScrollId(scrollId);
        ClearScrollResponse clearScrollResponse = null;
        try {
            clearScrollResponse = client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        boolean succeeded = clearScrollResponse.isSucceeded();
    }
    return results;
}

7.2 search_afterを使用した検索

public List<SearchHit> searchWithSearchAfter(RestHighLevelClient client, String index, String field1, String field2, String query, int size, String[] include, String[] exclude) {
    List<SearchHit> results = new ArrayList<>();
    SearchRequest request = new SearchRequest(index);
    SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.preTags("<a style='color: #e4393c'>");
    highlightBuilder.postTags("</a>");
    highlightBuilder.field(field2);
    QueryBuilder queryBuilder = QueryBuilders.boolQuery()
            .should(QueryBuilders.matchQuery(field1, query))
            .must(QueryBuilders.matchQuery(field2, query));
    sourceBuilder.query(queryBuilder);
    sourceBuilder.fetchSource(include, exclude);
    sourceBuilder.size(size);
    sourceBuilder.sort("_id", SortOrder.DESC);
    sourceBuilder.highlighter(highlightBuilder);
    request.source(sourceBuilder);
    SearchResponse response = null;
    try {
        response = client.search(request, RequestOptions.DEFAULT);
        SearchHit[] hits = response.getHits().getHits();
        while(hits.length > 0) {
            for(SearchHit hit : hits) {
                results.add(hit);
            }
            SearchHit last = hits[hits.length - 1];
            sourceBuilder.searchAfter(last.getSortValues());
            response = client.search(request, RequestOptions.DEFAULT);
            hits = response.getHits().getHits();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return results;
}

8. 統計分析

public static Map> analyzeCounts(RestHighLevelClient client, String index, String query, String field1, String field2, String... aggFields) {
    SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    sourceBuilder.fetchSource(false);
    SearchRequest request = new SearchRequest(index);
    QueryBuilder queryBuilder = QueryBuilders.boolQuery()
            .should(QueryBuilders.matchQuery(field1, query))
            .must(QueryBuilders.matchQuery(field2, query));
    sourceBuilder.query(queryBuilder);
    Map> fieldAggMap = new LinkedHashMap<>();
    TermsAggregationBuilder aggregationBuilder;
    SearchResponse response = null;
    for(String fieldName : aggFields) {
        aggregationBuilder = AggregationBuilders.terms("agg_name").field(fieldName);
        sourceBuilder.aggregation(aggregationBuilder);
        request.source(sourceBuilder);
        sourceBuilder.size(0);
        try {
            response = client.search(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Aggregations aggregations = response.getAggregations();
        Terms byTopicAggregation = aggregations.get("agg_name");
        List buckets = byTopicAggregation.getBuckets();
        Map bucketsFieldsAgg = new LinkedHashMap<>();
        buckets.forEach(bucket ->
                bucketsFieldsAgg.put(bucket.getKeyAsString(), bucket.getDocCount())
        );
        fieldAggMap.put(fieldName, bucketsFieldsAgg);
    }
    return fieldAggMap;
}

タグ: Elasticsearch RestHighLevelClient Java 全文検索 統計分析

5月18日 22:22 投稿