Environment set up

Create a SpringBoot project.

configuration

<properties>
    <java.version>1.8</java.version>
    <! -- Customize a es version dependency to ensure that it is consistent with local -->
    <elasticsearch.version>7.6.2</elasticsearch.version>
</properties>

<dependencies>
    <! Parse web pages. Can only parse web pages -->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
    <! --fastJson-->
    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.62</version>
    </dependency>
    <! --Elasticsearch-->
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-thymeleaf</artifactId>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-devtools</artifactId>
        <scope>runtime</scope>
        <optional>true</optional>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-configuration-processor</artifactId>
        <optional>true</optional>
    </dependency>
    <dependency>
        <groupId>org.projectlombok</groupId>
        <artifactId>lombok</artifactId>
        <optional>true</optional>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
        <exclusions>
            <exclusion>
                <groupId>org.junit.vintage</groupId>
                <artifactId>junit-vintage-engine</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
</dependencies>
Copy the code

## Disable the Thymeleaf cache
spring.thymeleaf.cache=false
Copy the code

Import page information. This has a Baidu Cloud link in the Elasticsearch overview

controller

@Controller
public class IndexController {

    @RequestMapping({"/","/index"})
    public String index(a){
        return "index"; }}Copy the code

~ HTTP :localhost:8080/

Jsoup parsing

<! Parse web pages. Can only parse web pages -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.10.2</version>
</dependency>
Copy the code

Create the utils package and create the HtmlParseUtil class

By analyzing its website, you should be able to understand the following code

@Component// Use @autoWired injection. Use new it instead of handing it over to Spring
public class HtmlParseUtil {

    public static void main(String[] args) throws IOException {
        // Get the request. Need to connected to the Internet
        String url = "https://search.jd.com/Search?keyword=java";
        // Parse the page. The Document object returned by Jsoup is the browser's Document object
        Document document = Jsoup.parse(new URL(url), 30000);
        // All the operations that Document can do in js can be performed in the next operation
        Element element = document.getElementById("J_goodsList");
        System.out.println(element.html());// Print the HTML source code under the J_goodList tag
        // Get all the li elements
        Elements elements = element.getElementsByTag("li");
        // Get all the contents of the element
        for (Element e1 : elements) {
            // Image lazy loading
            String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = e1.getElementsByClass("p-price").eq(0).text();
            String title = e1.getElementsByClass("p-name").eq(0).text();
            System.out.println("= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = ="); System.out.println(img); System.out.println(price); System.out.println(title); }}}Copy the code

The corresponding information is successfully obtained. Procedure It is then encapsulated. Start by creating a POJO Content object

Then encapsulate it into a parseJD method.

@Component// Use @autoWired injection. Use new it instead of handing it over to Spring
public class HtmlParseUtil {

    public static void main(String[] args) throws IOException {
        new HtmlParseUtil().parseJD("Vue").forEach(System.out::println);
    }


    public ArrayList<Content> parseJD(String keywords) throws IOException {
        // Get the request. Need to connected to the Internet
        String url = "https://search.jd.com/Search?keyword=" + keywords;
        System.out.println(url);
        // Parse the page. The Document object returned by Jsoup is the browser's Document object
        Document document = Jsoup.parse(new URL(url), 30000);
        // All the operations that Document can do in js can be performed in the next operation
        Element element = document.getElementById("J_goodsList");
        //System.out.println(element.html());
        // Get all the li elements
        Elements elements = element.getElementsByTag("li");

        ArrayList<Content> goodsList = new ArrayList<>();
        // Get all the contents of the element
        for (Element e1 : elements) {
            String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = e1.getElementsByClass("p-price").eq(0).text();
            String title = e1.getElementsByClass("p-name").eq(0).text();

            Content content = new Content();
            content.setImg(img);
            content.setTitle(title);
            content.setPrice(price);
            goodsList.add(content);
        }
        returngoodsList; }}Copy the code

Business writing

Elastcisearch’s configuration classes are still configured.

@Configuration
public class ElasticSearchClientConfig {

    @Bean
    public RestHighLevelClient restHighLevelClient(a) {
        RestHighLevelClient client = new RestHighLevelClient(
                RestClient.builder(
                        new HttpHost("localhost".9200."http")));
        returnclient; }}Copy the code

Write the Service business class

@Service
public class ContentService {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    //1. Place parsed data into es index
    public Boolean parseContent(String keywords) throws IOException {
        ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);

        // Put the query data into es
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");// The expiration time is two minutes

        for (int i = 0; i < contents.size(); i++){ System.out.println(JSON.toJSONString(contents.get(i))); bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return! bulk.hasFailures();// Returns whether the insertion was successful}}Copy the code

Write the controoler

@RestController
public class ContentController {

    @Autowired
    ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
        Boolean result = contentService.parseContent(keyword);
        returnresult; }}Copy the code

Test http://localhost:8080/parse/java to start the project

The related Javas commodity information was successfully added.

Then we continued to write the Service, adding a paging search for data in ES.

@Service
public class ContentService {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    //1. Place parsed data into es index
    public Boolean parseContent(String keywords) throws IOException {
        ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);

        // Put the query data into es
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");// The expiration time is two minutes

        for (int i = 0; i < contents.size(); i++){ System.out.println(JSON.toJSONString(contents.get(i))); bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return! bulk.hasFailures();// Returns whether the insertion was successful
    }

    //2. Obtain these data for search function
    public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo<=1){
            pageNo = 1;
        }

        // Conditional search
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        / / paging
        sourceBuilder.from(pageNo);// Start data
        sourceBuilder.size(pageSize);// Page size

        // Precisely match the keyword
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));// Timeout control

        // Perform a search
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        ArrayList<Map<String,Object>> list = new ArrayList<>();

        SearchHit[] hits = searchResponse.getHits().getHits();// Get the hits array object
        for (SearchHit documentFields : hits){
            list.add(documentFields.getSourceAsMap());// Add to list
        }
        returnlist; }}Copy the code

Then add a request to the Controller

@RestController
public class ContentController {

    @Autowired
    ContentService contentService;

    @GetMapping("/parse/{keyword}")
    public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
        Boolean result = contentService.parseContent(keyword);
        return result;
    }

    @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
    public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                           @PathVariable("pageNo") int pageNo,
                                           @PathVariable("pageSize") int pageSize) throws IOException {

        List<Map<String, Object>> list = contentService.searchPage(keyword, pageNo, pageSize);
        returnlist; }}Copy the code

Test http://localhost:8080/search/java/1/20 to start the project

The front page

Import Vue and Axios, I’m using the online version here

< script SRC = "https://cdn.staticfile.org/vue/2.6.2/vue.min.js" > < / script >

<script src="https://unpkg.com/axios/dist/axios.min.js"></script>

Modify our Index page.

Start the project to see the effect. (I have parsed the VUE data and added it to ES)

Highlighting function

Let’s modify the code in the business class Service.

Add the highlighted field substitution to the title in _source

@Service
public class ContentService {

    @Autowired
    RestHighLevelClient restHighLevelClient;

    //1. Place parsed data into es index
    public Boolean parseContent(String keywords) throws IOException {
        ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);

        // Put the query data into es
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("2m");// The expiration time is two minutes

        for (int i = 0; i < contents.size(); i++){ System.out.println(JSON.toJSONString(contents.get(i))); bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
        }
        BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
        return! bulk.hasFailures();// Returns whether the insertion was successful
    }

    //2. Obtain these data for search function
    public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
        if (pageNo<=1){
            pageNo = 1;
        }

        // Conditional search
        SearchRequest searchRequest = new SearchRequest("jd_goods");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        / / paging
        sourceBuilder.from(pageNo);// Start data
        sourceBuilder.size(pageSize);// Page size

        // Precisely match the keyword
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
        sourceBuilder.query(termQueryBuilder);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));// Timeout control

        / / highlight
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        highlightBuilder.field("title");// Highlight the field
        highlightBuilder.requireFieldMatch(false);// Turn off multiple highlights. For example, if there are multiple Vues in the title, highlight only one
        highlightBuilder.preTags("<span style='color:red'>");// Pre-label
        highlightBuilder.postTags("</span>");// Back label
        sourceBuilder.highlighter(highlightBuilder);// Add highlight


        // Perform a search
        searchRequest.source(sourceBuilder);
        SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);

        ArrayList<Map<String,Object>> list = new ArrayList<>();

        SearchHit[] hits = searchResponse.getHits().getHits();// Get the hits array object
        for (SearchHit hit : hits){
            Map<String, HighlightField> highlightFields = hit.getHighlightFields();
            Map<String, Object> sourceAsMap = hit.getSourceAsMap();// The original result
            HighlightField title = highlightFields.get("title");
            // Parse the highlighted field, replacing the original field with the highlighted field
            if(title! =null){
                Text[] fragments = title.fragments();
                String hTitle = "";
                for (Text text : fragments) {
                     hTitle += text;
                }
                sourceAsMap.put("title",hTitle);// Replace the highlighted field with the original content
            }
            list.add(sourceAsMap);
        }
        returnlist; }}Copy the code

<p class="productTitle">
    <a v-html="result.title">  </a>
</p>
Copy the code

Restart the service and access the test. http://localhost:8080/

Done!