hebwxw 发表于 2015-8-1 14:16:34

Apache Lucene 评分原理及代码分析

  在IndexSearcher类中有一个管理Lucene得分情况的方法,如下所示:



1 public Explanation explain(Weight weight, int doc) throws IOException {
2   return weight.explain(reader, doc);
3 }
  返回的这个Explanation的实例解释了Lucene中Document的得分情况。我们可以测试一下,直观地感觉一下到底这个Explanation的实例都记录了一个Document的哪些信息。
  写一个测试类,如下所示:



1 package org.shirdrn.lucene.learn;
2
3 import java.io.IOException;
4 import java.util.Date;
5
6 import net.teamhot.lucene.ThesaurusAnalyzer;
7
8 import org.apache.lucene.document.Document;
9 import org.apache.lucene.document.Field;
10 import org.apache.lucene.index.CorruptIndexException;
11 import org.apache.lucene.index.IndexWriter;
12 import org.apache.lucene.index.Term;
13 import org.apache.lucene.index.TermDocs;
14 import org.apache.lucene.search.Explanation;
15 import org.apache.lucene.search.Hits;
16 import org.apache.lucene.search.IndexSearcher;
17 import org.apache.lucene.search.Query;
18 import org.apache.lucene.search.TermQuery;
19 import org.apache.lucene.store.LockObtainFailedException;
20
21 public class AboutLuceneScore {
22
23 private String path = "E:\\Lucene\\index";
24
25 public void createIndex(){
26    IndexWriter writer;
27    try {
28   writer = new IndexWriter(path,new ThesaurusAnalyzer(),true);
29   
30   Field fieldA = new Field("contents","一人",Field.Store.YES,Field.Index.TOKENIZED);
31   Document docA = new Document();
32   docA.add(fieldA);
33   
34   Field fieldB = new Field("contents","一人 之交 一人之交",Field.Store.YES,Field.Index.TOKENIZED);
35   Document docB = new Document();
36   docB.add(fieldB);
37   
38   Field fieldC = new Field("contents","一人 之下 一人之下",Field.Store.YES,Field.Index.TOKENIZED);
39   Document docC = new Document();
40   docC.add(fieldC);
41   
42   Field fieldD = new Field("contents","一人 做事 一人当 一人做事一人当",Field.Store.YES,Field.Index.TOKENIZED);
43   Document docD = new Document();
44   docD.add(fieldD);
45   
46   Field fieldE = new Field("contents","一人 做事 一人當 一人做事一人當",Field.Store.YES,Field.Index.TOKENIZED);
47   Document docE = new Document();
48   docE.add(fieldE);
49
50   writer.addDocument(docA);
51   writer.addDocument(docB);
52   writer.addDocument(docC);
53   writer.addDocument(docD);
54   writer.addDocument(docE);
55   
56   writer.close();
57    } catch (CorruptIndexException e) {
58   e.printStackTrace();
59    } catch (LockObtainFailedException e) {
60   e.printStackTrace();
61    } catch (IOException e) {
62   e.printStackTrace();
63    }
64 }
65
66 public static void main(String[] args) {
67    AboutLuceneScore aus = new AboutLuceneScore();
68    aus.createIndex();    // 建立索引
69    try {
70   String keyword = "一人";
71   Term term = new Term("contents",keyword);
72   Query query = new TermQuery(term);
73   IndexSearcher searcher = new IndexSearcher(aus.path);
74   Date startTime = new Date();
75   Hits hits = searcher.search(query);
76   TermDocs termDocs = searcher.getIndexReader().termDocs(term);
77   while(termDocs.next()){
78      System.out.print("搜索关键字在编号为 "+termDocs.doc());
79      System.out.println(" 的Document中出现过 "+termDocs.freq()+" 次");
80   }
81   System.out.println("********************************************************************");
82   for(int i=0;i
页: [1]
查看完整版本: Apache Lucene 评分原理及代码分析