博客信息

lucene案例

发布时间:『 2019-01-27 22:27』  博客类别:索引框架  阅读(985)

对某一表进行索引操作的帮助类

package com.javaxl.p1.component;

import com.javaxl.p1.entity.Blog;
import com.javaxl.p1.service.BlogService;
import com.javaxl.p1.utils.DateUtil;
import com.javaxl.p1.utils.LuceneUtil;
import com.javaxl.p1.utils.PropertiesUtil;
import com.javaxl.p1.utils.StringUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.store.Directory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.*;

/**
 * @author 小李飞刀
 * @site www.xiaomage.com
 * @company xxx公司
 * @create  2019-01-27 21:16
 * <p>
 * 将所有的博客生成索引文件进行存储
 */
@Component
public class BlogIndex {
    @Autowired
    private BlogService blogService;

    /**
     * 添加博客索引(发表博客的时候添加索引信息)
     *
     * @param blog
     * @throws Exception
     */
    public void addIndex(Blog blog) throws Exception {
        Directory directory = LuceneUtil.getDirectory(PropertiesUtil.getValue("indexPath"));
        IndexWriter writer = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
        Document doc = new Document();
        doc.add(new StringField("bid", String.valueOf(blog.getBid()), Field.Store.YES));
        doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
        doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Field.Store.YES));
        doc.add(new TextField("content", StringUtils.html2Text(blog.getContent()), Field.Store.YES));
        writer.addDocument(doc);
        writer.close();
    }

    /**
     * 删除指定博客的索引
     *
     * @param bid
     * @throws Exception
     */
    public void deleteIndex(String bid) throws Exception {
        Directory directory = LuceneUtil.getDirectory(PropertiesUtil.getValue("indexPath"));
        IndexWriter writer = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
        writer.deleteDocuments(new Term("bid", bid));
        writer.forceMergeDeletes(); // 强制删除
        writer.commit();
        writer.close();
    }

    /**
     * 更新博客索引
     *
     * @param blog
     * @throws Exception
     */
    public void updateIndex(Blog blog) throws Exception {
        Directory directory = LuceneUtil.getDirectory(PropertiesUtil.getValue("indexPath"));
        IndexWriter writer = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
        Document doc = new Document();
        doc.add(new StringField("bid", String.valueOf(blog.getBid()), Field.Store.YES));
        doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
        doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Field.Store.YES));
        doc.add(new TextField("content", StringUtils.html2Text(blog.getContent()), Field.Store.YES));
        writer.updateDocument(new Term("bid", String.valueOf(blog.getBid())), doc);
        writer.close();
    }

    /**
     * 将数据库中所有的博客进行索引,然后存储索引文件到指定的位置
     * 当索引文件丢失的时候使用
     */
    public void indexBLogs() throws Exception {
        String path = PropertiesUtil.getValue("indexPath");
        File file = new File(path);
        Directory directory = null;
        IndexWriter indexWriter = null;
        if (file != null) {
            FileUtils.deleteDirectory(file);
            directory = LuceneUtil.getDirectory(path);
            indexWriter = LuceneUtil.getIndexWriter(directory, new SmartChineseAnalyzer());
            List<Blog> blogs = blogService.queryBlogsPager(null, null);

            String contentText = "";
            for (Blog blog : blogs) {
                Document doc = new Document();
                doc.add(new StringField("bid", String.valueOf(blog.getBid()), Field.Store.YES));
                doc.add(new TextField("title", blog.getTitle(), Field.Store.YES));
                doc.add(new StringField("releaseDate", DateUtil.formatDate(new Date(), "yyyy-MM-dd"), Field.Store.YES));
                contentText = StringUtils.html2Text(blog.getContent());
                doc.add(new TextField("content", contentText, Field.Store.YES));
                indexWriter.addDocument(doc);
            }
        }
        LuceneUtil.close(indexWriter, directory);
    }

    /**
     * 按关键字索引博客
     *
     * @param q
     * @return
     * @throws Exception
     */
    public List<Blog> searchBlog(String q) throws Exception {
        Directory directory = LuceneUtil.getDirectory(PropertiesUtil.getValue("indexPath"));
        DirectoryReader reader = LuceneUtil.getDirectoryReader(directory);
        IndexSearcher searcher = LuceneUtil.getIndexSearcher(reader);
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
//				拿一句话到索引目中的索引文件中的词库进行关键词碰撞
        Query query = new QueryParser("title", analyzer).parse(q);
        Query query2 = new QueryParser("content", analyzer).parse(q);
        BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
        booleanQuery.add(query, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query2, BooleanClause.Occur.SHOULD);

//        优先高亮title
        Highlighter highlighter = LuceneUtil.getHighlighter(query, "title");
//         组合高亮
        TopDocs topDocs = searcher.search(booleanQuery.build(), 100);
        //处理得分命中的文档
        List<Blog> blogList = new ArrayList<>();
        Blog blog = null;
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            Document doc = searcher.doc(scoreDoc.doc);
            blog = new Blog();
            blog.setBid(Integer.parseInt(doc.get("bid")));
            blog.setReleaseDateStr(doc.get("releaseDate"));
            String title = doc.get("title");
            String content = StringEscapeUtils.escapeHtml4(doc.get("content"));
            if (title != null) {
                TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
                String hTitle = highlighter.getBestFragment(tokenStream, title);
                if (StringUtils.isBlank(hTitle)) {
                    blog.setTitle(title);
                } else {
                    blog.setTitle(hTitle);
                }
            }

            if (content != null) {
                TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
                String hContent = highlighter.getBestFragment(tokenStream, content);
                if (StringUtils.isBlank(hContent)) {
                    if (content.length() <= 200) {
                        blog.setContent(content);
                    } else {
                        blog.setContent(content.substring(0, 200));
                    }
                } else {
                    blog.setContent(hContent);
                }
            }
            blogList.add(blog);
        }
        LuceneUtil.close(reader, directory);
        return blogList;
    }
}


对应的controller层代码

@ResponseBody
    @RequestMapping("/add")
    public Map add(Blog blog, HttpServletRequest request){
        Map map = new HashMap();
        try {
            this.blogService.insert(blog);
            blogIndex.addIndex(blog);
            map.put("success",true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return map;
    }

    @ResponseBody
    @RequestMapping("/edit")
    public Map edit(Blog blog,HttpServletRequest request){
        Map map = new HashMap();
        try {
            this.blogService.updateByPrimaryKeySelective(blog);
            blogIndex.updateIndex(blog);
            map.put("success",true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return map;
    }

    @ResponseBody
    @RequestMapping("/del")
    public Map del(Blog blog){
        Map map = new HashMap();
        try {
            this.blogService.deleteByPrimaryKey(blog.getBid());
            blogIndex.deleteIndex(blog.getBid()+"");
            map.put("success",true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return map;
    }

    /**
     * 将全部的博客索引文件重新生成
     * @return
     */
    @ResponseBody
    @RequestMapping("/indexBLogs")
    public Map indexBLogs(){
        Map map = new HashMap();
        try {
            blogIndex.indexBLogs();
            map.put("success",true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return map;
    }

    /**
     * 根据关键字查询相关博客信息
     * @param q
     * @return
     * @throws Exception
     */
    @RequestMapping("/q")
    public ModelAndView search(@RequestParam(value="q",required=false) String q, HttpServletRequest request)throws Exception{
        PageBean pageBean = new PageBean();
        pageBean.setRequest(request);
        ModelAndView mav=new ModelAndView();
        mav.addObject("pageTitle", "搜索关键字'"+q+"'结果页面_博客系统");
        mav.addObject("mainPage", "foreground/blogModule/blog/result.jsp");

//        开始索引博客
        List<Blog> blogList=blogIndex.searchBlog(q);

        pageBean.setTotal(blogList.size());
//        查询出符合条件的所有记录然后进行截取
        mav.addObject("blogList", blogList.subList((pageBean.getPage()-1)*pageBean.getRows(), pageBean.getPage()*pageBean.getRows()));
//		上一页、下一页的链接
        mav.addObject("q", q);
        mav.addObject("resultTotal", pageBean.getTotal());
        mav.addObject("pageBean",pageBean);
        mav.setViewName("index");
        return mav;
    }




关键字:     lucene  

备案号:湘ICP备19000029号

Copyright © 2018-2019 javaxl晓码阁 版权所有