设为首页 收藏本站
查看: 669|回复: 0

[经验分享] constellio——基于solr的开源搜索引擎系统源码研究(三)

[复制链接]

尚未签到

发表于 2015-7-17 10:47:06 | 显示全部楼层 |阅读模式
  先看搜索功能的实现源码,下面是搜索功能接口SearchServices.java



/**
* Constellio, Open Source Enterprise Search
* Copyright (C) 2010 DocuLibre inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA  02110-1301  USA
*/
package com.doculibre.constellio.services;
import org.apache.solr.client.solrj.response.QueryResponse;
import com.doculibre.constellio.entities.ConstellioUser;
import com.doculibre.constellio.entities.search.SimpleSearch;
public interface SearchServices {
QueryResponse search(SimpleSearch simpleSearch, int start, int rows, ConstellioUser user);
QueryResponse search(SimpleSearch simpleSearch, int start, int rows, SearchParams searchParams, ConstellioUser user);
}
  搜索功能接口实现类SearchServicesImpl.java



/**
* Constellio, Open Source Enterprise Search
* Copyright (C) 2010 DocuLibre inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA  02110-1301  USA
*/
package com.doculibre.constellio.services;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import com.doculibre.constellio.entities.ConstellioUser;
import com.doculibre.constellio.entities.CopyField;
import com.doculibre.constellio.entities.IndexField;
import com.doculibre.constellio.entities.Record;
import com.doculibre.constellio.entities.RecordCollection;
import com.doculibre.constellio.entities.search.CloudKeyword;
import com.doculibre.constellio.entities.search.FacetValue;
import com.doculibre.constellio.entities.search.SearchableFacet;
import com.doculibre.constellio.entities.search.SearchedFacet;
import com.doculibre.constellio.entities.search.SimpleSearch;
import com.doculibre.constellio.indexing.IndexingManager;
import com.doculibre.constellio.search.SynonymUtils;
import com.doculibre.constellio.solr.context.SolrCoreContext;
import com.doculibre.constellio.solr.handler.ConstellioSolrQueryParams;
import com.doculibre.constellio.utils.ConstellioSpringUtils;
public class SearchServicesImpl implements SearchServices {
private static final Logger LOGGER = Logger.getLogger(SearchServicesImpl.class.getName());
@Override
public QueryResponse search(SimpleSearch simpleSearch, int start, int rows, ConstellioUser user) {
return search(simpleSearch, start, rows, new SearchParams(), user);
}
@Override
public QueryResponse search(SimpleSearch simpleSearch, int start, int rows, SearchParams searchParams,
ConstellioUser user) {
QueryResponse queryResponse;
//System.out.println("查询字符串2:"+simpleSearch.getQuery());
//simpleSearch.setQuery("公司");
//System.out.println(simpleSearch.getQuery());
String collectionName = simpleSearch.getCollectionName();
if (collectionName != null) {
RecordCollectionServices collectionServices = ConstellioSpringUtils.getRecordCollectionServices();
RecordServices recordServices = ConstellioSpringUtils.getRecordServices();
RecordCollection collection = collectionServices.get(collectionName);
SolrServices solrServices = ConstellioSpringUtils.getSolrServices();
Boolean usesDisMax = solrServices.usesDisMax(collection);
SolrQuery query;
if (!collection.isOpenSearch()) {
query = toSolrQuery(simpleSearch, usesDisMax, true, true, user);
} else {
query = toSolrQuery(simpleSearch, usesDisMax, false, true, user);
}
// displayQuery(query);

String luceneQuery = simpleSearch.getLuceneQuery();
query.setParam(ConstellioSolrQueryParams.LUCENE_QUERY, luceneQuery);
query.setParam(ConstellioSolrQueryParams.COLLECTION_NAME, collectionName);
if (user != null) {
query.setParam(ConstellioSolrQueryParams.USER_ID, "" + user.getId());
}
String queryString = query.getQuery();
if (StringUtils.isEmpty(queryString)) {
queryString = SimpleSearch.SEARCH_ALL;
}
List pendingExclusions = recordServices.getPendingExclusions(collection);
while (!pendingExclusions.isEmpty()) {
IndexingManager indexingManager = IndexingManager.get(collection);
if (indexingManager.isActive()) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
pendingExclusions = recordServices.getPendingExclusions(collection);
} else {
return null;
}
}
// SolrQuery query = new SolrQuery();
query.set("collectionName", simpleSearch.getCollectionName());
// query.setQuery(luceneQuery);
// nb résultats par page
            query.setRows(rows);
// page de début
            query.setStart(start);
query.setHighlight(searchParams.isHighlightingEnabled());
if (searchParams.isHighlightingEnabled()) {
query.setHighlightFragsize(searchParams.getFragsize());
query.setHighlightSnippets(searchParams.getSnippets());
}
if (simpleSearch.getSortField() != null) {
ORDER order = SimpleSearch.SORT_DESCENDING.equals(simpleSearch.getSortOrder()) ? ORDER.desc
: ORDER.asc;
IndexFieldServices indexFieldServices = ConstellioSpringUtils.getIndexFieldServices();
IndexField indexField = indexFieldServices.get(simpleSearch.getSortField(), collection);
if (indexField != null) {
IndexField sortIndexField = indexFieldServices.getSortFieldOf(indexField);
if (sortIndexField != null) {
query.setSortField(sortIndexField.getName(), order);
}
}
}
if (collection.isOpenSearch()) {
query.setParam("openSearchURL", collection.getOpenSearchURL());
Locale locale = simpleSearch.getSingleSearchLocale();
if (locale != null) {
query.setParam("lang", locale.getLanguage());
}
}
if (searchParams.getHighlightedFields() == null) {
IndexField defaultSearchField = collection.getDefaultSearchIndexField();
query.addHighlightField(defaultSearchField.getName());
for (CopyField copyFieldDest : defaultSearchField.getCopyFieldsDest()) {
IndexField copyIndexFieldSource = copyFieldDest.getIndexFieldSource();
if (copyIndexFieldSource != null && !copyIndexFieldSource.isTitleField()
&& copyIndexFieldSource.isHighlighted()) {
query.addHighlightField(copyIndexFieldSource.getName());
}
}
IndexField titleField = collection.getTitleIndexField();
if (titleField != null && titleField.isHighlighted()) {
query.addHighlightField(titleField.getName());
}
} else {
for (String highlightedField : searchParams.getHighlightedFields()) {
IndexField field = collection.getIndexField(highlightedField);
if (field != null) {
query.addHighlightField(highlightedField);
}
}
}
SolrServer server = SolrCoreContext.getSolrServer(collectionName);
if (server != null) {
try {
// displayQuery(query);
queryResponse = server.query(query);
} catch (SolrServerException e) {
queryResponse = null;
e.printStackTrace();
}
} else {
queryResponse = null;
}
} else {
queryResponse = null;
}
// improveQueryResponse(collectionName, queryResponse);
// System.out.println("Response size" + queryResponse.getResults().getNumFound());
return queryResponse;
}
// private void displayQuery(SolrQuery query) {
// System.out.println("text : " + query.getQuery());
// System.out.println("operateur : " + query.getParams("q.op"));
//        
// String[] filters = query.getFilterQueries();
// if (filters != null){
// System.out.println("filtres");
// for (String filtre : filters){
// System.out.println("\t" + filtre);
// }
// }
//        
// }
public static SolrQuery toSolrQuery(SimpleSearch simpleSearch, boolean useDismax,
boolean withMultiValuedFacets, boolean withSingleValuedFacets, ConstellioUser user) {
return toSolrQuery(simpleSearch, useDismax, withMultiValuedFacets, withSingleValuedFacets, false, user);
}
public static SolrQuery toSolrQuery(SimpleSearch simpleSearch, boolean useDismax,
boolean withMultiValuedFacets, boolean withSingleValuedFacets, boolean notIncludedOnly, ConstellioUser user) {
SolrQuery query = new SolrQuery();
boolean addSynonyms = !SolrServices.synonymsFilterActivated;
if (addSynonyms) {
addQueryTextAndOperatorWithSynonyms(simpleSearch, query, useDismax);
} else {
addQueryTextAndOperatorWithoutSynonyms(simpleSearch, query, useDismax);
}
// FIXME confirmer avec Vincent:
// 1. que les tags sont vraiment a ajouter par defaut (meme pour openSearch)
// 2. separer les tags par des AND et non des OU
        addTagsTo(simpleSearch, query);
boolean addFacets = withMultiValuedFacets || withSingleValuedFacets;
if (addFacets) {
addFacetsTo(simpleSearch, query, withMultiValuedFacets, withSingleValuedFacets, notIncludedOnly);
}
return query;
}
private static void addQueryTextAndOperatorWithSynonyms(SimpleSearch simpleSearch, SolrQuery query,
boolean useDismax) {
String collectionName = simpleSearch.getCollectionName();
if (StringUtils.isNotEmpty(collectionName)) {
if (simpleSearch.getAdvancedSearchRule() == null) {
String textQuery = getTextQuery(simpleSearch);
if (StringUtils.isNotEmpty(textQuery)) {
String searchType = simpleSearch.getSearchType();
StringBuffer sb = new StringBuffer();
// sb.append("(");
if (SimpleSearch.SEARCH_ALL.equals(textQuery)) {
sb.append(textQuery);
//                        if (useDismax) {
//                            // Non valide avec disMax => disMax doit etre desactivee
//                            query.setQueryType(SolrServices.DEFAULT_DISTANCE_NAME);
//                            LOGGER
//                                .warning("Dismax is replaced by the default distance since the former does not allow wildcard");
//                        }
} else if (SimpleSearch.EXACT_EXPRESSION.equals(searchType)) {
// FIXME a corriger : si "n" terms avec chacun "m" synonyms => traiter les combinaison
// de
// synonymes
// Sinon solution simple: synonymes de l'expression (solution prise pour l'instant)
String textAndSynonyms = SynonymUtils.addSynonyms(textQuery, collectionName, true);
sb.append(textAndSynonyms);// SynonymUtils.addSynonyms(textQuery,
// collectionName)
} else {
// TOUS_LES_MOTS OU AU_MOINS_UN_MOT
                        String operator;
if (SimpleSearch.AT_LEAST_ONE_WORD.equals(searchType)) {
operator = "OR";
} else {
operator = "AND";
}
String[] terms = simpleSearch.getQueryTerms();
boolean hardCodedOperator = false;
loop1: for (String term : terms) {
for (String luceneOperator : SimpleSearch.LUCENE_OPERATORS) {
if (term.equalsIgnoreCase(luceneOperator)) {
hardCodedOperator = true;
break loop1;
}
}
}
for (int i = 0; i < terms.length; i++) {
String term = terms;
String termAndSynonyms = SynonymUtils.addSynonyms(term, collectionName, false);
if (term.equals(termAndSynonyms)) {
sb.append(term);
} else {
sb.append("(" + termAndSynonyms + ")");
}
if (i < terms.length - 1) {
if (hardCodedOperator) {
sb.append(" ");
} else {
// sb.append(operator);
sb.append(" " + operator + " ");
}
}
}
}
// sb.append(")");
                    query.setQuery(sb.toString());
}
} else {
query.setQuery(simpleSearch.getLuceneQuery());
}
}
}
private static String getTextQuery(SimpleSearch simpleSearch) {
String textQuery = simpleSearch.getEscapedQuery();
if (textQuery == null) {
textQuery = "";
}
return textQuery;
}
private static void addQueryTextAndOperatorWithoutSynonyms(SimpleSearch simpleSearch, SolrQuery query,
boolean useDismax) {
String collectionName = simpleSearch.getCollectionName();
if (StringUtils.isNotEmpty(collectionName)) {
if (simpleSearch.getAdvancedSearchRule() == null) {
String textQuery = getTextQuery(simpleSearch);
if (StringUtils.isNotEmpty(textQuery)) {
String searchType = simpleSearch.getSearchType();
if (SimpleSearch.SEARCH_ALL.equals(textQuery)) {
query.setQuery(textQuery);
// FIXME : AND ou Operateur par defaut?
query.setParam("q.op", "AND");
//                        if (useDismax) {
//                            // Non valide avec disMax => disMax doit etre desactivee
//                            query.setQueryType(SolrServices.DEFAULT_DISTANCE_NAME);
//                            LOGGER
//                                .warning("Dismax is replaced by the default distance since the former does not allow wildcard");
//                        }
} else if (SimpleSearch.AT_LEAST_ONE_WORD.equals(searchType)) {
query.setQuery(textQuery);
// Operateur OR
query.setParam("q.op", "OR");
if (useDismax) {
query.setParam("mm", "0");
}
} else if (SimpleSearch.EXACT_EXPRESSION.equals(searchType)) {
query.setQuery("\"" + textQuery + "\"");
if (useDismax) {
// FIXME il faut faire quoi avec dismax?
                        }
} else {
if (SimpleSearch.ALL_WORDS.equals(searchType)) {
query.setQuery(textQuery);
// Operateur AND
query.setParam("q.op", "AND");
if (useDismax) {
query.setParam("mm", "100");
}
} else {
throw new RuntimeException("Invalid searchType " + searchType);
}
}
}
} else {
query.setQuery(simpleSearch.getLuceneQuery());
}
}
}
private static void addTagsTo(SimpleSearch simpleSearch, SolrQuery query) {
StringBuffer sb = new StringBuffer();
Set tags = simpleSearch.getTags();
if (!tags.isEmpty()) {
sb.append("(");
for (Iterator it = tags.iterator(); it.hasNext();) {
String tag = it.next();
sb.append("(");
sb.append(IndexField.FREE_TEXT_TAGGING_FIELD + ":" + tag);
sb.append(" OR ");
sb.append(IndexField.THESAURUS_TAGGING_FIELD + ":" + tag);
sb.append(")");
if (it.hasNext()) {
sb.append(" AND ");
}
}
sb.append(")");
}
query.addFilterQuery(sb.toString());
}
private static void addFacetsTo(SimpleSearch simpleSearch, SolrQuery query,
boolean withMultiValuedFacets, boolean withSingleValuedFacets, boolean notIncludedOnly) {
List searchedFacets = simpleSearch.getSearchedFacets();
for (SearchedFacet searchedFacet : searchedFacets) {
SearchableFacet searchableFacet = searchedFacet.getSearchableFacet();
if ((searchableFacet.isMultiValued() && withMultiValuedFacets)
|| (!searchableFacet.isMultiValued() && withSingleValuedFacets)) {
if (!searchableFacet.isCluster()) {
if (searchableFacet.isQuery()) {
if (!searchedFacet.getIncludedValues().isEmpty()) {
StringBuffer sb = new StringBuffer("");
if (notIncludedOnly) {
sb.append("{!tag=dt}");
// sb.append("{!tag=");
// boolean first = true;
// for (String includedValue : searchedFacet.getIncludedValues()) {
// if (first) {
// first = false;
// } else {
// sb.append(",");
// }
// sb.append(includedValue);
// }
// sb.append("}");
                            }
sb.append("(");
boolean first = true;
for (String includedValue : searchedFacet.getIncludedValues()) {
if (first) {
first = false;
} else {
sb.append(" OR ");
}
sb.append(includedValue);
}
sb.append(")");
query.addFilterQuery(sb.toString());
}
} else {
String facetName = searchableFacet.getName();
if (!searchedFacet.getIncludedValues().isEmpty()) {
StringBuffer sb = new StringBuffer();
if (notIncludedOnly) {
sb.append("{!tag=dt}");
// StringBuffer sbTag = new StringBuffer();
// sbTag.append("{!tag=");
// boolean first = true;
// for (String includedValue : searchedFacet.getIncludedValues()) {
// if (first) {
// first = false;
// } else {
// sbTag.append(",");
// }
// sbTag.append(includedValue);
// }
// sbTag.append("}");
// sb.append(sbTag);
                            }
sb.append(facetName + ":(");
boolean first = true;
for (String includedValue : searchedFacet.getIncludedValues()) {
if (first) {
first = false;
} else {
sb.append(" OR ");
}
sb.append("\"");
sb.append(SimpleSearch.correctFacetValue(includedValue));
sb.append("\"");
}
sb.append(")");
query.addFilterQuery(sb.toString());
}
}
}
}
}
// valeurs exclues
for (SearchedFacet searchedFacet : searchedFacets) {
SearchableFacet searchableFacet = searchedFacet.getSearchableFacet();
if (!searchableFacet.isCluster() && !searchedFacet.getExcludedValues().isEmpty()) {
StringBuffer sb = new StringBuffer();
String facetName = searchableFacet.getName();
for (String excludedValue : searchedFacet.getExcludedValues()) {
sb.append("NOT ");
if (searchableFacet.isQuery()) {
sb.append(SimpleSearch.correctFacetValue(excludedValue));
} else {
sb.append(facetName);
sb.append(":\"");
sb.append(SimpleSearch.correctFacetValue(excludedValue));
sb.append("\"");
}
}
String sbToString = sb.toString();
if (!sbToString.isEmpty()) {
query.addFilterQuery(sb.toString());
}
}
}
SearchedFacet cluster = simpleSearch.getCluster();
if (cluster != null) {
RecordCollectionServices collectionServices = ConstellioSpringUtils.getRecordCollectionServices();
RecordCollection collection = collectionServices.get(simpleSearch.getCollectionName());
IndexField uniqueKeyIndexField = collection.getUniqueKeyIndexField();
if (!cluster.getIncludedValues().isEmpty()) {
StringBuilder sb = new StringBuilder(uniqueKeyIndexField.getName() + ":(");
for (String includedValue : cluster.getIncludedValues()) {
boolean first = true;
StringTokenizer st = new StringTokenizer(includedValue, FacetValue.CONCAT_DELIM);
while (st.hasMoreTokens()) {
String docId = st.nextToken();
if (first) {
first = false;
} else {
sb.append(" OR ");
}
sb.append("\"");
sb.append(docId);
sb.append("\"");
}
}
sb.append(")");
query.addFilterQuery(sb.toString());
}
if (!cluster.getExcludedValues().isEmpty()) {
StringBuilder sb = new StringBuilder();
for (String excludedValue : cluster.getExcludedValues()) {
StringTokenizer st = new StringTokenizer(excludedValue, FacetValue.CONCAT_DELIM);
while (st.hasMoreTokens()) {
String docId = st.nextToken();
sb.append("NOT ");
sb.append(uniqueKeyIndexField.getName());
sb.append(":\"");
sb.append(docId);
sb.append("\"");
if (st.hasMoreTokens()) {
sb.append(" ");
}
}
}
query.addFilterQuery(sb.toString());
}
}
CloudKeyword cloudKeyword = simpleSearch.getCloudKeyword();
if (cloudKeyword != null) {
query.addFilterQuery("keyword:\"" + cloudKeyword.getKeyword() + "\"");
}
Locale singleSearchLocale = simpleSearch.getSingleSearchLocale();
if (singleSearchLocale != null && StringUtils.isNotBlank(singleSearchLocale.getLanguage())) {
query.addFilterQuery(IndexField.LANGUAGE_FIELD + ":\"" + singleSearchLocale.getLanguage() + "\"");
}
}
}

运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-87590-1-1.html 上篇帖子: solr集成mmseg4j分词 下篇帖子: Solr data import 中XML/HTTP 数据源的使用
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表