elasticsearch

shawroad · shawroad · commit 01c35aeedec0 · 2020-08-04T09:18:00.000+08:00
diff --git a/elasticsearch/001-&#21019;&#24314;&#24211;&#24182;&#25554;&#20837;&#25968;&#25454;.py b/elasticsearch/001-&#21019;&#24314;&#24211;&#24182;&#25554;&#20837;&#25968;&#25454;.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+# @Time &nbsp; &nbsp;: 2020/7/30 15:04
+# @Author &nbsp;: xiaolu
+# @FileName: 001-&#21019;&#24314;&#24211;&#24182;&#25554;&#20837;&#25968;&#25454;.py
+# @Software: PyCharm
+from elasticsearch import Elasticsearch
+
+
+es = Elasticsearch()
+
+# result = es.indices.delete(index='point_type', ignore=[400, 404])  # &#21024;&#38500;&#32034;&#24341;(&#24211;)
+# exit()
+
+
+mapping = {
+    "settings": {
+        "analysis": {
+            "filter": {
+                "jieba_stop": {
+                    "type": "stop",
+                    "stopwords_path": "stopwords/stopwords.txt"
+                    },
+                "jieba_synonym": {
+                    "type": "synonym",
+                    "synonyms_path": "synonyms/synonyms.txt"
+                },
+                "my_shingle_filter": {
+                    "type": "shingle",
+                    "min_shingle_size": 2,
+                    "max_shingle_size": 2,
+                    "output_unigrams": False
+                }
+            },
+            "analyzer": {
+                "word_ans": {
+                    "tokenizer": "jieba_search",   # &#37319;&#29992;&#32467;&#24052;&#20998;&#35789;
+                    "filter": "jieba_stop"      # &#37319;&#29992;&#32467;&#24052;&#20572;&#29992;&#35789;&#36807;&#28388;
+                },
+                "char_ana": {
+                    "tokenizer": "standard",   # &#23545;&#20110;&#23383;&#31526; &#37319;&#29992;&#26631;&#20934;&#30340;&#20998;&#35789;&#26041;&#24335;  &#23601;&#26159;&#25353;&#23383;&#20998;&#21106;
+                    "filter": "jieba_stop"    # &#20063;&#37319;&#29992;jieba&#20572;&#29992;&#35789;&#36807;&#28388;
+                },
+                "char_bigram_ana": {
+                    "type": "custom",
+                    "tokenizer": "standard",
+                    "filter": [
+                        "jieba_stop",
+                        "my_shingle_filter"
+                    ]
+                },
+                "word_bigram_ana": {
+                    "type": "custom",
+                    "tokenizer": "jieba_search",
+                    "filter": [
+                        "jieba_stop",
+                        "my_shingle_filter"
+                    ]
+                }
+            }
+        }
+    },
+    "mappings": {
+        "properties": {
+            "title": {
+                "type": "keyword"
+            },
+            "author": {
+                "type": "keyword"
+            },
+            "dynasty": {
+                "type": "keyword"
+            },
+            "words": {
+                "type": "integer"
+            },
+            "content": {
+                "analyzer": "word_ana",
+                "search_analyzer": "word_ana",
+                "type": "text"
+            }
+        }
+    }
+}
+# &#30456;&#24403;&#20110;&#23558;content&#20837;&#24211;&#26102;&#65292;&#20250;&#36827;&#34892;&#20998;&#35789;&#65292;&#28982;&#21518;&#37319;&#29992;jieba&#30340;&#20572;&#29992;&#35789;&#36807;&#28388;&#26041;&#24335;&#12290;  &#24403;&#36890;&#36807;&#20869;&#23481;&#21435;&#26597;&#25214;&#26102;&#65292;&#20063;&#26159;&#20808;&#23558;&#38382;&#39064;&#20998;&#35789;&#65292;&#28982;&#21518;&#20572;&#29992;&#35789;&#36807;&#28388;&#65292;&#22312;&#36827;&#34892;&#21305;&#37197;&#12290;
+
+# es.indices.create(index='point_type', body=mapping)
+
+# &#28982;&#21518;&#25554;&#20837;&#25968;&#25454;
+data = [
+    {
+        "title": "&#38745;&#22812;&#24605;",
+        "author": "&#26446;&#30333;",
+        "dynasty": "&#21776;",
+        "words": "20",
+        "content": "&#24202;&#21069;&#26126;&#26376;&#20809;&#65292;&#30097;&#26159;&#22320;&#19978;&#38684;&#12290;&#20030;&#22836;&#26395;&#26126;&#26376;&#65292;&#20302;&#22836;&#24605;&#25925;&#20065;&#12290;"
+    },
+
+    {
+        "title": "&#35266;&#27815;&#28023;",
+        "author": "&#26361;&#25805;",
+        "dynasty": "&#19996;&#27721;&#26411;&#24180;",
+        "words": "56",
+        "content": "&#19996;&#20020;&#30883;&#30707;&#65292;&#20197;&#35266;&#27815;&#28023;&#12290;&#27700;&#20309;&#28601;&#28601;&#65292;&#23665;&#23707;&#31462;&#23769;&#12290;&#26641;&#26408;&#19995;&#29983;&#65292;&#30334;&#33609;&#20016;&#33538;&#12290;&#31179;&#39118;&#33831;&#29791;&#65292;&#27946;&#27874;&#28044;&#36215;&#12290;&#26085;&#26376;&#20043;&#34892;&#65292;&#33509;&#20986;&#20854;&#20013;&#12290;&#26143;&#27721;&#28799;&#28866;&#65292;&#33509;&#20986;&#20854;&#37324;&#12290;&#24184;&#29978;&#33267;&#21705;&#65292;&#27468;&#20197;&#21647;&#24535;&#12290;"
+    },
+
+    {
+        "title": "&#21647;&#40517;",
+        "author": "&#39558;&#23486;&#29579;",
+        "dynasty": "&#21776;",
+        "words": "18",
+        "content": "&#40517;&#40517;&#40517;&#65292;&#26354;&#39033;&#21521;&#22825;&#27468;&#12290;&#30333;&#27611;&#28014;&#32511;&#27700;&#65292;&#32418;&#25484;&#25320;&#28165;&#27874;&#12290;"
+    },
+
+    {
+        "title": "&#23558;&#36827;&#37202;",
+        "author": "&#38472;&#38518;",
+        "dynasty": "&#21776;",
+        "words": "14",
+        "content": "&#38134;&#40493;&#37329;&#40517;&#35328;&#24453;&#35841;&#65292;&#38539;&#23478;&#23731;&#28174;&#30343;&#23478;&#26377;"
+    },
+
+    {
+        "title": "&#26149;&#38634;",
+        "author": "&#30333;&#23621;&#26131;",
+        "dynasty": "&#21776;",
+        "words": "10",
+        "content": "&#22823;&#20284;&#33853;&#40517;&#27611;&#65292;&#23494;&#22914;&#39128;&#29577;&#23633;"
+    }
+]
+for d in data:
+    es.index(index='point_type', body=d)
diff --git a/elasticsearch/002-es&#20013;&#30340;&#25628;&#32034;.py b/elasticsearch/002-es&#20013;&#30340;&#25628;&#32034;.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# @Time &nbsp; &nbsp;: 2020/7/30 15:52
+# @Author &nbsp;: xiaolu
+# @FileName: 002-es&#20013;&#30340;&#25628;&#32034;.py
+# @Software: PyCharm
+from elasticsearch import Elasticsearch
+
+
+if __name__ == '__main__':
+    es = Elasticsearch()
+    querys = '&#19996;&#20020;&#30883;&#30707;'
+    dsl = {
+        'query': {
+            'match': {
+                'title': '&#21647;&#40517;'
+            }
+        }
+    }
+    results = es.search(index='point_type', body=dsl)['hits']['hits']  # &#25628;&#32034;&#22810;&#26465;&#32467;&#26524;&#30340;&#35805; &#36825;&#37324;&#21487;&#33021;&#26159;&#19968;&#20010;&#21015;&#34920;
+
+    res = []
+    for result in results:
+        res.append(result['_source'])
+    print(res)
+