设为首页 收藏本站
查看: 1419|回复: 0

[经验分享] coreseek-3.2.14 配置,已经在PHP中实现,还包括了实时更新搜索字段

[复制链接]
累计签到:1 天
连续签到:1 天
发表于 2015-1-30 09:03:49 | 显示全部楼层 |阅读模式
刚刚用上,做个测试,也算写个笔记,留着以后查看,嗯也是第一篇博文。。。。
#

# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#
#############################################################################
## data source definition
#############################################################################
#################################################
##向本地索引添加文档源,可以出现多次,必选项。
#################################################
source main
{
        # data source type. mandatory, no default value
        # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
        type                                        = mysql  #表示使用mysql数据源
        #####################################################################
        ## SQL settings (for 'mysql' and 'pgsql' types)
        #####################################################################
        # some straightforward parameters for SQL source types
        sql_host                                = localhost
        sql_user                                = 用户名
        sql_pass                                = 密码
        sql_db                                = 数据库
        sql_port                                = 3306        # optional, default is 3306
        # UNIX socket name
        # optional, default is empty (reuse client library defaults)
        # usually '/var/lib/mysql/mysql.sock' on Linux
        # usually '/tmp/mysql.sock' on FreeBSD
        #
        sql_sock                                = /tmp/mysql.sock

        # MySQL specific client connection flags
        # optional, default is 0
        # 数据传输方式
        # mysql_connect_flags        = 32 # enable compression
        # MySQL specific SSL certificate settings
        # optional, defaults are empty
        # SLL链接
        # mysql_ssl_cert                = /etc/ssl/client-cert.pem
        # mysql_ssl_key                = /etc/ssl/client-key.pem
        # mysql_ssl_ca                = /etc/ssl/cacert.pem
        # MS SQL specific Windows authentication mode flag
        # MUST be in sync with charset_type index-level setting
        # optional, default is 0
        #
        # mssql_winauth                        = 1 # use currently logged on user credentials

        # MS SQL specific Unicode indexing flag
        # optional, default is 0 (request SBCS data)
        #
        # mssql_unicode                        = 1 # request Unicode data from server

        # ODBC specific DSN (data source name)
        # mandatory for odbc source type, no default value
        #
        # odbc_dsn                                = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
        # sql_query                                = SELECT id, data FROM documents.csv

        # pre-query, executed before the main fetch query
        # multi-value, optional, default is empty list of queries
        # 发送SQL语句前发送
        sql_query_pre                        = SET NAMES utf8
        sql_query_pre                        = SET SESSION query_cache_type=OFF
        sql_query_pre                        = REPLACE INTO counter select 1,MAX(id) from bbs
        # main document fetch query
        # mandatory, integer document ID field MUST be the first selected column
        #需要查询的表 构建查询
        sql_query=select* from bbwx_rules where rid<=(select max_id from counter where counter.id=1)
        # 如果多个数据源并要在一个索引,必须要保持字段的顺序数量跟数据都要一致,否则将出错
        # range query setup, query that must return min and max ID values
        # optional, default is empty
        #
        # sql_query will need to reference $start and $end boundaries
        # if using ranged query:
        #
        # sql_query                                = \
        #        SELECT doc.id, doc.id AS group, doc.title, doc.data \
        #        FROM documents doc \
        #        WHERE id>=$start AND id<=$end
        #
        # sql_query_range                = SELECT MIN(id),MAX(id) FROM documents

        # range query step
        # optional, default is 1024
        #
        # sql_range_step                = 1000

        # unsigned integer attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # optional bit size can be specified, default is 32
        #
        # sql_attr_uint                        = author_id
        # sql_attr_uint                        = forum_id:9 # 9 bits for forum_id
        # sql_attr_uint                        = group_id
        # boolean attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # equivalent to sql_attr_uint with 1-bit size
        #
        # sql_attr_bool                        = is_deleted

        # bigint attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # declares a signed (unlike uint!) 64-bit attribute
        #
        # sql_attr_bigint                        = my_bigint_id

        # UNIX timestamp attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # similar to integer, but can also be used in date functions
        #
        # sql_attr_timestamp        = posted_ts
        # sql_attr_timestamp        = last_edited_ts
        # sql_attr_timestamp                = date_added
        # string ordinal attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # sorts strings (bytewise), and stores their indexes in the sorted list
        # sorting by this attr is equivalent to sorting by the original strings
        #
        # sql_attr_str2ordinal        = author_name

        # floating point attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # values are stored in single precision, 32-bit IEEE 754 format
        #
        # sql_attr_float = lat_radians
        # sql_attr_float = long_radians

        # multi-valued attribute (MVA) attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # MVA values are variable length lists of unsigned 32-bit integers
        #
        # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
        # ATTR-TYPE is 'uint' or 'timestamp'
        # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
        # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
        # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
        #
        # sql_attr_multi        = uint tag from query; SELECT id, tag FROM tags
        # sql_attr_multi        = uint tag from ranged-query; \
        #        SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
        #        SELECT MIN(id), MAX(id) FROM tags

        # post-query, executed on sql_query completion
        # optional, default is empty
        #
        # sql_query_post                =
       
        # post-index-query, executed on successful indexing completion
        # optional, default is empty
        # $maxid expands to max document ID actually fetched from DB
        #
        # sql_query_post_index = REPLACE INTO counters ( id, val ) \
        #        VALUES ( 'max_indexed_id', $maxid )

        # ranged query throttling, in milliseconds
        # optional, default is 0 which means no delay
        # enforces given delay before each query step
        sql_ranged_throttle        = 0
        # document info query, ONLY for CLI search (ie. testing and debugging)
        # optional, default is empty
        # must contain $id macro and must fetch the document by that id
#你要查找的数据
        sql_query_info                = SELECT * FROM bbs WHERE id=$id
        # kill-list query, fetches the document IDs for kill-list
        # k-list will suppress matches from preceding indexes in the same query
        # optional, default is empty
        #
        # sql_query_killlist        = SELECT id FROM documents WHERE edited>=@last_reindex

        # columns to unpack on indexer side when indexing
        # multi-value, optional, default is empty list
        #
        # unpack_zlib = zlib_column
        # unpack_mysqlcompress = compressed_column
        # unpack_mysqlcompress = compressed_column_2

        # maximum unpacked length allowed in MySQL COMPRESS() unpacker
        # optional, default is 16M
        #
        # unpack_mysqlcompress_maxsize = 16M

        #####################################################################
        ## xmlpipe settings
        #####################################################################
        # type                                = xmlpipe
        # shell command to invoke xmlpipe stream producer
        # mandatory
        #
        # xmlpipe_command        = cat /usr/local/coreseek/var/test.xml
        #####################################################################
        ## xmlpipe2 settings
        #####################################################################
        # type                                = xmlpipe2
        # xmlpipe_command        = cat /usr/local/coreseek/var/test2.xml

        # xmlpipe2 field declaration
        # multi-value, optional, default is empty
        #
        # xmlpipe_field                                = subject
        # xmlpipe_field                                = content

        # xmlpipe2 attribute declaration
        # multi-value, optional, default is empty
        # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
        #
        # xmlpipe_attr_timestamp        = published
        # xmlpipe_attr_uint                        = author_id

        # perform UTF-8 validation, and filter out incorrect codes
        # avoids XML parser choking on non-UTF-8 documents
        # optional, default is 0
        #
        # xmlpipe_fixup_utf8                = 1
}

# inherited source example
# 继承数据源
# all the parameters are copied from the parent source,
# and may then be overridden in this source definition
source delta : main
{
#        sql_ranged_throttle                        = 100
        sql_query_pre = set names utf8
        sql_query = select * from bbs where rid > (select max_id from counter where counter.id=1)
}
#############################################################################
## index definition
#############################################################################
# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index main
{
        # document source(s) to index
        # multi-value, mandatory
        # document IDs must be globally unique across all sources
        source                        = main
        # index files path and file name, without extension
        # mandatory, path must be writable, extensions will be auto-appended
        path                        = /usr/local/coreseek/var/data/main
        # document attribute values (docinfo) storage mode
        # optional, default is 'extern'
        # known values are 'none', 'extern' and 'inline'
        docinfo                        = extern
        # memory locking for cached data (.spa and .spi), to prevent swapping
        # optional, default is 0 (do not mlock)
        # requires searchd to be run from root
        mlock                        = 0
        # a list of morphology preprocessors to apply
        # optional, default is empty
        #
        # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
        # 'soundex', and 'metaphone'; additional preprocessors available from
        # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
        # (see libstemmer_c/libstemmer/modules.txt)
        #
        # morphology         = stem_en, stem_ru, soundex
        # morphology        = libstemmer_german
        # morphology        = libstemmer_sv
        morphology                = none
        # minimum word length at which to enable stemming
        # optional, default is 1 (stem everything)
        #
        # min_stemming_len        = 1

        # stopword files list (space separated)
        # optional, default is empty
        # contents are plain text, charset_table and stemming are both applied
        #
        # stopwords                        = G:\data\stopwords.txt

        # wordforms file, in "mapfrom > mapto" plain text format
        # optional, default is empty
        #
        #wordforms                        = G:\data\wordforms.txt

        # tokenizing exceptions file
        # optional, default is empty
        #
        # plain text, case sensitive, space insensitive in map-from part
        # one "Map Several Words => ToASingleOne" entry per line
        #
        # exceptions                = /data/exceptions.txt

        # minimum indexed word length
        # default is 1 (index everything)
        min_word_len                = 1
        # charset encoding type
        # optional, default is 'sbcs'
        # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
        charset_type                = zh_cn.utf-8
        charset_dictpath        = /usr/local/mmseg/etc
        # charset definition and case folding rules "table"
        # optional, default value depends on charset_type
        #
        # defaults are configured to include English and Russian characters only
        # you need to change the table to include additional ones
        # this behavior MAY change in future versions
        #
        # 'sbcs' default value is
        # charset_table                = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
        #
        # 'utf-8' default value is
        # charset_table                = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F

        # ignored characters list
        # optional, default value is empty
        #
        # ignore_chars                = U+00AD

        # minimum word prefix length to index
        # optional, default is 0 (do not index prefixes)
        #
        # min_prefix_len        = 0

        # minimum word infix length to index
        # optional, default is 0 (do not index infixes)
        #
        # min_infix_len                = 0

        # list of fields to limit prefix/infix indexing to
        # optional, default value is empty (index all fields in prefix/infix mode)
        #
        # prefix_fields                = filename
        # infix_fields                = url, domain

        # enable star-syntax (wildcards) when searching prefix/infix indexes
        # known values are 0 and 1
        # optional, default is 0 (do not use wildcard syntax)
        #
        # enable_star                = 1

        # n-gram length to index, for CJK indexing
        # only supports 0 and 1 for now, other lengths to be implemented
        # optional, default is 0 (disable n-grams)
        #
        # ngram_len                                = 1

        # n-gram characters list, for CJK indexing
        # optional, default is empty
        #
        # ngram_chars                        = U+3000..U+2FA1F

        # phrase boundary characters list
        # optional, default is empty
        #
        # phrase_boundary                = ., ?, !, U+2026 # horizontal ellipsis

        # phrase boundary word position increment
        # optional, default is 0
        #
        # phrase_boundary_step        = 100

        # whether to strip HTML tags from incoming documents
        # known values are 0 (do not strip) and 1 (do strip)
        # optional, default is 0
        html_strip                                = 0
        # what HTML attributes to index if stripping HTML
        # optional, default is empty (do not index anything)
        #
        # html_index_attrs                = img=alt,title; a=title;

        # what HTML elements contents to strip
        # optional, default is empty (do not strip element contents)
        #
        # html_remove_elements        = style, script

        # whether to preopen index data files on startup
        # optional, default is 0 (do not preopen), searchd-only
        #
        # preopen                                        = 1

        # whether to keep dictionary (.spi) on disk, or cache it in RAM
        # optional, default is 0 (cache in RAM), searchd-only
        #
        # ondisk_dict                                = 1

        # whether to enable in-place inversion (2x less disk, 90-95% speed)
        # optional, default is 0 (use separate temporary files), indexer-only
        #
        # inplace_enable                        = 1

        # in-place fine-tuning options
        # optional, defaults are listed below
        #
        # inplace_hit_gap                        = 0                # preallocated hitlist gap size
        # inplace_docinfo_gap                = 0                # preallocated docinfo gap size
        # inplace_reloc_factor        = 0.1        # relocation buffer size within arena
        # inplace_write_factor        = 0.1        # write buffer size within arena

        # whether to index original keywords along with stemmed versions
        # enables "=exactform" operator to work
        # optional, default is 0
        #
        # index_exact_words                = 1

        # position increment on overshort (less that min_word_len) words
        # optional, allowed values are 0 and 1, default is 1
        #
        # overshort_step                        = 1

        # position increment on stopword
        # optional, allowed values are 0 and 1, default is 1
        #
        # stopword_step                        = 1
}

# inherited index example
#
# all the parameters are copied from the parent index,
# and may then be overridden in this index definition
index delta : main
{
        source                         = delta
        path                        = /usr/local/coreseek/var/data/delta
#        morphology                = stem_en
}

# distributed index example
# 分布式的索引
# this is a virtual index which can NOT be directly indexed,
# and only contains references to other local and/or remote indexes
#index dist1
#{
#        # 'distributed' index type MUST be specified
#        type                                = distributed
#
#        # local index to be searched
#        # there can be many local indexes configured
#        local                                = test1
#        local                                = test1stemmed
#
#        # remote agent
#        # multiple remote agents may be specified
#        # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
#        # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
#        agent                                = localhost:9313:remote1
#        agent                                = localhost:9314:remote2,remote3
#        # agent                                = /var/run/searchd.sock:remote4
#
#        # blackhole remote agent, for debugging/testing
#        # network errors and search results will be ignored
#        #
#        # agent_blackhole                = testbox:9312:testindex1,testindex2
#
#
#        # remote agent connection timeout, milliseconds
#        # optional, default is 1000 ms, ie. 1 sec
#        agent_connect_timeout        = 1000
#
#        # remote agent query timeout, milliseconds
#        # optional, default is 3000 ms, ie. 3 sec
#        agent_query_timeout                = 3000
#}
#############################################################################
## indexer settings
#############################################################################
#索引器
indexer
{
        # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
        # optional, default is 32M, max is 2047M, recommended is 256M to 1024M
        mem_limit                        = 32M
        # maximum IO calls per second (for I/O throttling)
        # optional, default is 0 (unlimited)
        #
        # max_iops                        = 40

        # maximum IO call size, bytes (for I/O throttling)
        # optional, default is 0 (unlimited)
        #
        # max_iosize                = 1048576

        # maximum xmlpipe2 field length, bytes
        # optional, default is 2M
        #
        # max_xmlpipe2_field        = 4M

        # write buffer size, bytes
        # several (currently up to 4) buffers will be allocated
        # write buffers are allocated in addition to mem_limit
        # optional, default is 1M
        #
        # write_buffer                = 1M
}
#############################################################################
## searchd settings
#############################################################################
#服务器进程
searchd
{
        # hostname, port, or hostname:port, or /unix/socket/path to listen on
        # multi-value, multiple listen points are allowed
        # optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
        #
        # listen                                = 127.0.0.1
        # listen                                = 192.168.0.1:9312
        # listen                                = 9312
        # listen                                = /var/run/searchd.sock

        # log file, searchd run info is logged here
        # optional, default is 'searchd.log'
        log                                        = /usr/local/coreseek/var/log/searchd.log
        # query log file, all search queries are logged here
        # optional, default is empty (do not log queries)
        query_log                        = /usr/local/coreseek/var/log/query.log
        # client read timeout, seconds
        # optional, default is 5
        read_timeout                = 5
        # request timeout, seconds
        # optional, default is 5 minutes
        client_timeout                = 300
        # maximum amount of children to fork (concurrent searches to run)
        # optional, default is 0 (unlimited)
        max_children                = 30
        # PID file, searchd process ID file name
        # mandatory
        pid_file                        = /usr/local/coreseek/var/log/searchd.pid
        # max amount of matches the daemon ever keeps in RAM, per-index
        # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
        # default is 1000 (just like Google)
        max_matches                        = 1000
        # seamless rotate, prevents rotate stalls if precaching huge datasets
        # optional, default is 1
        seamless_rotate                = 1
        # whether to forcibly preopen all indexes on startup
        # optional, default is 0 (do not preopen)
        preopen_indexes                = 0
        # whether to unlink .old index copies on succesful rotation.
        # optional, default is 1 (do unlink)
        unlink_old                        = 1
        # attribute updates periodic flush timeout, seconds
        # updates will be automatically dumped to disk this frequently
        # optional, default is 0 (disable periodic flush)
        #
        # attr_flush_period        = 900

        # instance-wide ondisk_dict defaults (per-index value take precedence)
        # optional, default is 0 (precache all dictionaries in RAM)
        #
        # ondisk_dict_default        = 1

        # MVA updates pool size
        # shared between all instances of searchd, disables attr flushes!
        # optional, default size is 1M
        mva_updates_pool        = 1M
        # max allowed network packet size
        # limits both query packets from clients, and responses from agents
        # optional, default size is 8M
        max_packet_size                = 8M
        # crash log path
        # searchd will (try to) log crashed query to 'crash_log_path.PID' file
        # optional, default is empty (do not create crash logs)
        #
        # crash_log_path                = /usr/local/coreseek/var/log/crash

        # max allowed per-query filter count
        # optional, default is 256
        max_filters                        = 256
        # max allowed per-filter values count
        # optional, default is 4096
        max_filter_values        = 4096

        # socket listen queue length
        # optional, default is 5
        #
        # listen_backlog                = 5

        # per-keyword read buffer size
        # optional, default is 256K
        #
        # read_buffer                        = 256K

        # unhinted read size (currently used when reading hits)
        # optional, default is 32K
        #
        # read_unhinted                = 32K
}
# --eof--
---------------------------------php端
index.php搜索首页
<html>
<head>
        <meta http-equiv="Content-Type" Content="text/html;charset=utf-8">
</head>
<body>
<form action="search.php" method="post">
        <input type="text" name="key" />
        <input type="submit" value="百度一下,我就知道" />
</form>
</body>
</html>
---------
<?php
        header("Content-Type:text/html;charset=utf-8");
        $keyword = $_POST['key'];
        //$keyword = "屌丝";
        $sphinx = new SphinxClient();
        #创建sphinx对象
        $sphinx->SetServer("localhost", 9312);
        #建立连接,第一个参数sphinx服务器地址,第二个sphinx监听端口
        $result = $sphinx->query($keyword,"main");
        //var_dump($result);
        $ids = array_keys($result['matches']);
        //var_dump($ids);
        $ids = join(",",$ids);
       
        //连接数据库
        $conn = mysql_connect("localhost","用户名","密码");
       
        //选择数据库
        mysql_select_db("test");
       
        //设置字符集
        mysql_set_charset("utf8");
       
        //准备sql语句
        $sql = "select id,title,content from bbs where id in(".$ids.")";
       
        //发送sql语句
        $result = mysql_query($sql);
       
        //处理结果皆
       
        $opts = array(
        #格式化摘要,高亮字体设置
        #在匹配关键字之前插入的字符串,默认是<b>
        "before_match"  => "<span style='font-weight:bold;color:red'>",
        #在匹配关键字之后插入的字符串,默认是</b>
        "after_match"  => "</span>"
        );
       
        while($row = mysql_fetch_assoc($result)){
                $res = $sphinx->buildExcerpts($row,"main",$keyword,$opts);
                echo "标题:".$res['1']."<br />";
                echo "内容:".$res['2']."<br />";
                echo "<hr>";
        }
        //关闭
        mysql_close($conn);

只是为了自己看看。。。
计划任务
delta.sh 和 main.sh都放在coreseek的bin目录下
#!/bin/bash
#delta.sh
/usr/local/coreseek/bin/indexer delta --rotate >> /usr/local/coreseek/var/log/delta.log
main.sh
#!/bin/bash
#main.sh
/usr/local/coreseek/bin/indexer main --rotate >> /usr/local/coreseek/var/log/main.log



运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-41712-1-1.html 上篇帖子: centos下coreseek安装及使用教程 下篇帖子: drbd中文应用指南
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表