solr源码类里的一些方法

linghaiyan · 发表于 2016-12-16 10:50:14

//SolrIndexSearcher.java
/**
* 获得docID的方法
*/
private void getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
// old parameters: DocListAndSet out, Query query, List<Query> filterList, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
QueryResultKey key=null;
//request里传过来的要返回的document数目，默认是10条
int maxDocRequested = cmd.getOffset() + cmd.getLen();
// check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc= maxDocRequested;
DocList superset;
// we can try and look up the complete query in the cache.
// we can't do that if filter!=null though (we don't want to
// do hashCode() and equals() for a big DocSet).
if (queryResultCache != null && cmd.getFilter()==null) {
// all of the current flags can be reused during warming,
// so set all of them on the cache key.
/**
* 根据用户输入的查询关键字生成的key，作为存放到queryResultCache里面的Key
*/
key = new QueryResultKey(cmd.getQuery(), cmd.getFilterList(), cmd.getSort(), cmd.getFlags());
if ((cmd.getFlags() & NO_CHECK_QCACHE)==0) {
superset = (DocList)queryResultCache.get(key);
if (superset != null) {
// check that the cache entry has scores recorded if we need them
if ((cmd.getFlags() & GET_SCORES)==0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than
// requested
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
}
if (out.docList != null) {
// found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query.
if (out.docSet==null && ((cmd.getFlags() & GET_DOCSET)!=0) ) {
if (cmd.getFilterList()==null) {
out.docSet = getDocSet(cmd.getQuery());
} else {
List<Query> newList = new ArrayList<Query>(cmd.getFilterList()
.size()+1);
newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList);
}
}
return;
}
}
// If we are going to generate the result, bump up to the
// next resultWindowSize for better caching.
// handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc=queryResultWindowSize;
} else {
supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested;
}
}

// OK, so now we need to generate an answer.
// One way to do that would be to check if we have an unordered list
// of results for the base query.  If so, we can apply the filters and then
// sort by the resulting set.  This can only be used if:
// - the sort doesn't contain score
// - we don't want score returned.
// check if we should try and use the filter cache
boolean useFilterCache=false;
if ((cmd.getFlags() & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) {
useFilterCache=true;
SortField[] sfields = cmd.getSort().getSort();
for (SortField sf : sfields) {
if (sf.getType() == SortField.SCORE) {
useFilterCache=false;
break;
}
}
}
if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
// slower than simply re-executing the query.
if (out.docSet == null) {
out.docSet = getDocSet(cmd.getQuery(),cmd.getFilter());
DocSet bigFilt = getDocSet(cmd.getFilterList());
if (bigFilt != null) out.docSet = out.docSet.intersection(bigFilt);
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
superset = sortDocSet(out.docSet,cmd.getSort(),supersetMaxDoc);
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
} else {
// do it the normal way...
cmd.setSupersetMaxDoc(supersetMaxDoc);
if ((cmd.getFlags() & GET_DOCSET)!=0) {
DocSet qDocSet = getDocListAndSetNC(qr,cmd);
// cache the docSet matching the query w/o filtering
if (filterCache!=null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(),qDocSet);
} else {
/**
* 此方法获取documentID，存放在docListAndSet对象的docList里面，
*/
getDocListNC(qr,cmd);
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
}
//
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
// lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached
if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {
//将用户查询的关键字产生的key，和根据这个关键字查询出的DocId集合保存到queryResultCache里面去
queryResultCache.put(key, superset);
}
}
--------------------------------------------------------------------
/**
* 此方法获取documentID，存放在docListAndSet对象的docList里面，
*/
private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
//Query query, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList());
final long timeAllowed = cmd.getTimeAllowed();
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last=maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;
/**
* 产生一个query
*/
Query query = QueryUtils.makeQueryable(cmd.getQuery());
// handle zero case...
if (lastDocRequested<=0) {
final DocSet filt = filter;
final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
final int[] numHits = new int[1];
HitCollector hc = new HitCollector() {
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
numHits[0]++;
if (score > topscore[0]) topscore[0]=score;
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
nDocsReturned=0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = numHits[0];
maxScore = totalHits>0 ? topscore[0] : 0.0f;
} else if (cmd.getSort() != null) {
// can't use TopDocs if there is a sort since it
// will do automatic score normalization.
// NOTE: this changed late in Lucene 1.9
final DocSet filt = filter;
final int[] numHits = new int[1];
final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, cmd.getSort().getSort(), len);
HitCollector hc = new HitCollector() {
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
numHits[0]++;
hq.insert(new FieldDoc(doc, score));
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
totalHits = numHits[0];//总个数
maxScore = totalHits>0 ? hq.getMaxScore() : 0.0f;
nDocsReturned = hq.size();
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
for (int i = nDocsReturned -1; i >= 0; i--) {
FieldDoc fieldDoc = (FieldDoc)hq.pop();
// fillFields is the point where score normalization happens
// hq.fillFields(fieldDoc)
ids = fieldDoc.doc;
if (scores != null) scores = fieldDoc.score;
}
} else {
// No Sort specified (sort by score descending)
// This case could be done with TopDocs, but would currently require
// getting a BitSet filter from a DocSet which may be inefficient.
final DocSet filt = filter;
final ScorePriorityQueue hq = new ScorePriorityQueue(lastDocRequested);
final int[] numHits = new int[1];
HitCollector hc = new HitCollector() {
float minScore=Float.NEGATIVE_INFINITY;  // minimum score in the priority queue
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
if (numHits[0]++ < lastDocRequested || score >= minScore) {
// TODO: if docs are always delivered in order, we could use "score>minScore"
// instead of "score>=minScore" and avoid tiebreaking scores
// in the priority queue.
// but might BooleanScorer14 might still be used and deliver docs out-of-order?
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score;
}
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
/**
* 查询,把查询的结果放到hq里面
*/
searcher.search(query, hc );

}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
totalHits = numHits[0];
nDocsReturned = hq.size();
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
ScoreDoc sdoc =null;
for (int i = nDocsReturned -1; i >= 0; i--) {
sdoc = (ScoreDoc)hq.pop();
ids = sdoc.doc;
if (scores != null) scores = sdoc.score;
}
maxScore = sdoc ==null ? 0.0f : sdoc.score;
}

int sliceLen = Math.min(lastDocRequested,nDocsReturned);
if (sliceLen < 0) sliceLen=0;
qr.setDocList(new DocSlice(0,sliceLen,ids,scores,totalHits,maxScore));

/**************** older implementation using TopDocs *******************

Filter lfilter=null;
if (filter != null) {
final BitSet bits = filter.getBits(); // avoid if possible
lfilter = new Filter() {
public BitSet bits(IndexReader reader)  {
return bits;
}
};
}
int lastDocRequested=offset+len;
// lucene doesn't allow 0 to be passed for nDocs
if (lastDocRequested==0) lastDocRequested=1;
// TopFieldDocs sortedDocs;  // use TopDocs so both versions can use it
TopDocs sortedDocs;
if (lsort!=null) {
sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
} else {
sortedDocs = searcher.search(query, lfilter, lastDocRequested);
}
int nDocsReturned = sortedDocs.scoreDocs.length;
int[] docs = new int[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
docs = sortedDocs.scoreDocs.doc;
}
float[] scores=null;
float maxScore=0.0f;
if ((flags & GET_SCORES) != 0) {
scores = new float[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
scores = sortedDocs.scoreDocs.score;
}
if (nDocsReturned>0) {
maxScore=sortedDocs.scoreDocs[0].score;
}
}
int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
if (sliceLen < 0) sliceLen=0;
return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);
**********************************************************************************/
}

//SolrIndexSearcher.java
/**
* Retrieve the {@link Document} instance corresponding to the document id.
*
* Note: The document will have all fields accessable, but if a field
* filter is provided, only the provided fields will be loaded (the
* remainder will be available lazily).
* 此方法从传入的docId号，到documentCached里面获取document，若没有获取到，这到索引里获取document，并加入到documentCached里面
*/
public Document doc(int i, Set<String> fields) throws IOException {
log.info("docId: " + i);

Document d;
if (documentCache != null) {
//重缓存里获取
d = (Document)documentCache.get(i);
if (d!=null) return d;  //获取到返回
}
if(!enableLazyFieldLoading || fields == null) {
d = searcher.getIndexReader().document(i);
} else {
//直接到索引里获取document
d = searcher.getIndexReader().document(i,
new SetNonLazyFieldSelector(fields));
}
if (documentCache != null) {
//加入到documentCached缓存里面
documentCache.put(i, d);
}
return d;
}

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] solr源码类里的一些方法

浏览过的版块

扫码加入运维网微信交流群