安装es
docker pull elasticsearch:7.14.0 docker run --name es -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e ES_JAVA_OPTS="-Xms256m -Xmx256m" --network mynetwork --ip 172.18.0.7 -v /d/docker/docker-php/elasticsearch/data:/usr/share/elasticsearch/data -d elasticsearch:7.14.0 #discovery.type=single-node表示单节点
下载IK中文分词插件
curl -L -o ik.zip https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.14.0/elasticsearch-analysis-ik-7.14.0.zip 或者将zip文件下载到宿主机,通过copy命令拷贝到es容器中 #https://github.com/medcl/elasticsearch-analysis-ik/releases #https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.14.0/elasticsearch-analysis-ik-7.14.0.zip docker cp C:\Users\Administrator\Desktop\docker\elasticsearch-analysis-ik-7.14.0.zip es:/usr/share/elasticsearch/plugins docker exec -it es bash cd /usr/share/elasticsearch/plugins unzip elasticsearch-analysis-ik-7.14.0.zip -d ./ik rm elasticsearch-analysis-ik-7.14.0.zip elasticsearch-plugin list
配置es,让其可以被head插件访问到
cd /usr/share/elasticsearch/config vi elasticsearch.yml http.cors.enabled: true http.cors.allow-origin: "*"
如果我们不小心把配置文件改错了,启动失败进不了容器中,我们可以把配置文件copy出来修改
docker cp es:/usr/share/elasticsearch/config/elasticsearch.yml C:\Users\Administrator\Desktop\elasticsearch.yml docker cp C:\Users\Administrator\Desktop\elasticsearch.yml es:/usr/share/elasticsearch/config/elasticsearch.yml docker restart es
安装head插件
docker pull mobz/elasticsearch-head:5 docker run -d --name es_head -p 9100:9100 --network mynetwork --ip 172.18.0.8 mobz/elasticsearch-head:5 docker ps
http://localhost:9100/
解决head插件406:Content-Type header [application/x-www-form-urlencoded] is not supported
docker cp es_head:/usr/src/app/_site/vendor.js C:\Users\Administrator\Desktop\docker #6886行:application/x-www-form-urlencoded改为application/json;charset=UTF-8 #7573行:application/x-www-form-urlencoded改为application/json;charset=UTF-8 docker cp C:\Users\Administrator\Desktop\vendor.js es_head:/usr/src/app/_site/vendor.js
复合查询
http://localhost:9200/_analyze POST { "analyzer": "ik_max_word", "text": [ "我是中国人" ] }
自定义词组
我们先来看看关键词 周廷泽 的检索结果
cd /usr/share/elasticsearch/plugins/ik/config mkdir custom touch single_word.dic vi single_word.dic >周廷泽 >周泽 cd /usr/share/elasticsearch/plugins/ik/config vi IKAnalyzer.cfg.xml <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <properties> <comment>IK Analyzer 扩展配置</comment> <!--用户可以在这里配置自己的扩展字典 --> <entry key="ext_dict">custom/single_word.dic</entry> <!--用户可以在这里配置自己的扩展停止词字典--> <entry key="ext_stopwords"></entry> <!--用户可以在这里配置远程扩展字典 --> <!-- <entry key="remote_ext_dict">words_location</entry> --> <!--用户可以在这里配置远程扩展停止词字典--> <!-- <entry key="remote_ext_stopwords">words_location</entry> --> </properties> docker restart es
安装拼音分词
https://github.com/medcl/elasticsearch-analysis-pinyin/tags
安装方式跟IK分词一样
<?php /** * https://www.elastic.co/guide/en/elasticsearch/client/php-api/current/index_management.html * https://open.163.com/newview/movie/free?pid=XGG8PCIUJ&mid=HGG8PFDRB */ use Elasticsearch\ClientBuilder; require './vendor/autoload.php'; /** * @param $num 生成汉字的数量 * @return string */ function getChar($num) { $b = ''; for ($i = 0; $i < $num; $i++) { // 使用chr()函数拼接双字节汉字,前一个chr()为高位字节,后一个为低位字节 $a = chr(mt_rand(0xB0, 0xD0)) . chr(mt_rand(0xA1, 0xF0)); // 转码 $b .= iconv('GB2312', 'UTF-8', $a); } return $b; } $client = ClientBuilder::create() ->setHosts(['127.0.0.1:9200']) ->setRetries(2) ->build(); $index = [ 'index' => 'shop', 'body' => [ 'settings' => [ 'number_of_shards' => 5, 'number_of_replicas' => 0, 'analysis' => [ 'analyzer' => [ 'text_analyzer' => [ 'tokenizer' => 'ik_max_word', 'filter' => 'py' ], 'completion_analyzer' => [ 'tokenizer' => 'keyword', 'filter' => 'py' ] ], 'filter' => [ 'py' => [ "type"=> "pinyin", "keep_full_pinyin"=> false, "keep_joined_full_pinyin"=> true, "keep_original" => true, "limit_first_letter_length" => 16, "none_chinese_pinyin_tokenize"=> false, "remove_duplicated_term"=> true ] ] ] ], 'mappings' => [ '_source' => [ 'enabled' => true ], 'properties' => [ 'article_id' => [ 'type' => 'integer' // long ], 'title' => [ 'type' => 'text', 'analyzer' => 'text_analyzer', 'search_analyzer' => 'ik_smart', ], 'title_completion' => [ 'type' => 'completion', 'analyzer' => 'completion_analyzer', // 'search_analyzer' => 'ik_smart', // 狮子 虱子 search:我掉到了狮子笼里怎么办 ], 'content' => [ 'type' => 'text', 'analyzer' => 'text_analyzer', 'search_analyzer' => 'ik_smart', ] ] ] ] ]; //$response = $client->indices()->create($index); //print_r($response); //exit; $doc = [ 'index' => 'shop', 'id' => 1, 'body' => [ 'article_id' => 1, 'title' => '人民日报再次点名阿里,不要妄想大而不倒', //'title_completion' => '人民日报再次点名阿里,不要妄想大而不倒',// 这里也可以设置成数组,词条 'title_completion' => ['人民日报', '阿里', '点名', '妄想', '不倒'], 'content' => '据吉林省委政法委官方微信公众号“平安吉林”消息:8月31日,省委召开省委政法委员会干部会议,宣布省委政法委主要领导任职决定。省委副书记高广滨,省委常委、省委政法委书记范锐平出席会议并讲话。省委常委、省委组织部部长张恩惠主持会议并宣布决定。副省长、省公安厅厅长刘金波,省法院院长徐家新,省检察院检察长尹伊君,省武警总队司令员赵洪炜出席会议。' ] ]; //$client->index($doc); //exit; // 搜索建议,前缀检索 $params = [ 'index' => 'shop', 'body' => [ 'suggest' => [ 'my-suggest' => [ 'text' => '人民', 'completion' => [ 'field' => 'title_completion', 'skip_duplicates' => true, 'size' => 5 ], ] ], ] ]; $result = $client->search($params); var_dump($result); echo "<hr/>"; //exit; $params = [ 'index' => 'shop', 'body' => [ 'query' => [ 'match' => [ 'title' => '阿里' ] ], // 'stored_fields' => ['title'] ] ]; /* select * from article where title like '%国际%' or title like '%test%'*/ $result = $client->search($params); var_dump($result);
访客评论