Elasticsearch 应用开发(1)-- 安装部署
2017-08-02
1. 介绍 Elasticsearch
Elasticsearch 是一个扩展性强,开源的全文检索引擎。它可以存储大量的数据,并且能进行实时检索和分析操作
1.1 它的几个简单应用场景
- 存储销售的产品信息,给用户提供搜索,搜索词自动完成功能
- Elasticsearch/Logstash/Kibana 整合,收集应用程序/服务日志,进行日志的聚合,分析处理,挖掘你感兴趣的信息
- 商品价格提醒功能,把用户感兴趣的商品价格信息保存到 Elasticsearch,使用 reverse-search 检查价格变动,给用户推送通知
- 商业信息分析系统(BI)。保存商业(广告)数据到 Elasticsearch,通过 Kibana 快速定制 dashboards,可视化商业数据,分析其中的信息,使用 Elasticsearch 的聚合功能进行复杂的商业数据查询
2. 安装部署
2.1 运行环境
- centos7 / debian8
- java1.8 (jdk)
- python3.6 (示例程序)
下载安装 jdk
jdk 下载地址,选择 jdk-8u144-linux-x64.tar.gz 对应的下载地址
安装
# 解压,在下载路径执行
tar xvf jdk-8u144-linux-x64.tar.gz -C /opt
# 创建符号链接
ln -sf /opt/jdk1.8.0_144 /opt/jdk
# 测试
java -version
输出: java version "1.8.0_144"
2.2 Elasticsearch 安装
下载
安装
# 解压,在下载路径执行
tar xvf elasticsearch-5.5.0.tar.gz -C /opt
# 创建符号链接
ln -sf /opt/elasticsearch-5.5.0 /opt/elasticsearch
# 创建 elastic 用户
adduser --system --no-create-home elastic
# 修改目录权限
# debian
cd /opt
chown -R elastic:nogroup elasticsearch*
# centos7
chown -R elastic:elastic elasticsearch*
# 用 elastic 用户运行 Elasticsearch
cd /opt/elasticsearch/bin
sudo -u elastic PATH=$PATH:/opt/jdk/bin ./elasticsearch
2.3 配置文件
文件 | 描述 |
---|---|
elasticsearch.yml | 配置数据路径,日志路径,端口等 |
jvm.options | 配置 java 环境,内存堆栈等 |
log4j2.properties | 配置日志记录参数 |
配置 Elasticsearch.yml
# 系群名称
cluster.name: test-es
# 节点名称
node.name: node-es-1
# host
network.host: 192.168.0.1
# port
http.port: 9200
# 数据路径
path.data: /opt/elasticsearch/data
# 日志路径
path.logs: /opt/elasticsearch/logs
配置 java 参数
# 初始 heap 空间大小
# 最大 heap 空间大小
-Xms4g
-Xmx4g
3. 用 Python 程序访问 Elasticsearch 示例
3.1 python 环境
- python3.6
3.2 安装 elasticsearch 库
pip install elasticsearch
3.3 插入数据,查询数据示例代码
#!/usr/bin/env python
# coding: utf-8
import pprint
from datetime import datetime
from elasticsearch import Elasticsearch
def main():
# 默认本机访问,端口9200
es = Elasticsearch(['http://localhost:9200'])
# 创建文档
def create_id(cur_id):
doc = {
'author': 'test-zsz',
'text': 'Elasticsearch: cool. bonsai cool.',
'timestamp': datetime.now(),
}
res = es.index(index="test-index", doc_type='tweet', id=cur_id, body=doc)
print(res['created'])
res = es.get(index="test-index", doc_type='tweet', id=cur_id)
print(res['_source'])
# 循环插入数据
for i in range(5):
# es.delete(index="test-index", doc_type='tweet', id=i)
create_id(cur_id=i)
# 更新索引
es.indices.refresh(index="test-index")
# 根据关键字查询
# res = es.search(index="test-index", body={"query": {"term": {'author': 'test-zy'}}})
# 查询所有文档
res = es.search(index="test-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])
if __name__ == '__main__':
main()
结果
False
{'timestamp': '2017-08-02T17:16:12.212797', 'author': 'test-zy', 'text': 'Elasticsearch: cool. bonsai cool.'}
False
{'timestamp': '2017-08-02T17:16:12.226318', 'author': 'test-zy', 'text': 'Elasticsearch: cool. bonsai cool.'}
False
{'timestamp': '2017-08-02T17:16:12.238793', 'author': 'test-zy', 'text': 'Elasticsearch: cool. bonsai cool.'}
False
{'timestamp': '2017-08-02T17:16:12.262015', 'author': 'test-zy', 'text': 'Elasticsearch: cool. bonsai cool.'}
False
{'timestamp': '2017-08-02T17:16:12.275738', 'author': 'test-zy', 'text': 'Elasticsearch: cool. bonsai cool.'}
Got 20 Hits:
2017-08-02T17:15:55.950035 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:56.016564 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:16:12.212797 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.811200 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.847475 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.859777 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.873198 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.915344 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.824415 test-zy: Elasticsearch: cool. bonsai cool.
2017-08-02T17:15:55.962373 test-zy: Elasticsearch: cool. bonsai cool.
3.4 查询集群状态示例
#!/usr/bin/env python
# coding: utf-8
import pprint
from datetime import datetime
from elasticsearch import Elasticsearch
def es_status():
# 连接 es 服务器
es = Elasticsearch(['http://localhost:9200'],
sniff_on_start=True,
sniff_on_connection_fail=True,
sniffer_timeout=60)
# 获取集群状态信息
state = es.cluster.state()
pprint.pprint(state)
# 获取集群统计信息
stats = es.cluster.stats()
pprint.pprint(stats)
# 获取配置信息
settings = es.cluster.get_settings()
pprint.pprint(settings)
if __name__ == '__main__':
es_status()
结果
{'blocks': {},
'cluster_name': 'yq-es',
'master_node': '73De-u8yT8GwKrP_MD0yhw',
'metadata': {'cluster_uuid': 'Uy_OOALVQjyF8KWr_eQ1rw',
'index-graveyard': {'tombstones': []},
'indices': {'test-index': {'aliases': [],
'in_sync_allocations': {'0': ['niazcE2iTK6xXJpvF4Ho9A'],
'1': ['rOhpGDFTRwCpJi5ROBit-A'],
'routing_table': {'indices': {'test-index': {'shards': {'0': [{'allocation_id': {'id': 'niazcE2iTK6xXJpvF4Ho9A'},
'index': 'test-index',
'node': '73De-u8yT8GwKrP_MD0yhw',
'primary': True,
'relocating_node': None,
'shard': 0,
'state': 'STARTED'},
{'index': 'test-index',
'node': None,
'primary': False,
'recovery_source': {'type': 'PEER'},
'relocating_node': None,
'shard': 0,
'state': 'UNASSIGNED',
'unassigned_info': {'allocation_status': 'no_attempt',
'at': '2017-08-02T05:52:21.770Z',
'delayed': False,
'reason': 'CLUSTER_RECOVERED'}}],
'state_uuid': 'Z4Sb6jcXTmqmgXjFqaaY0A',
'version': 6}
{'_nodes': {'failed': 0, 'successful': 1, 'total': 1},
'cluster_name': 'yq-es',
'indices': {'completion': {'size_in_bytes': 0},
'count': 1,
'docs': {'count': 20, 'deleted': 0},
'fielddata': {'evictions': 0, 'memory_size_in_bytes': 0},
......
'name': 'analysis-ik',
'version': '5.5.0'}],
'process': {'cpu': {'percent': 0},
'open_file_descriptors': {'avg': 187,
'max': 187,
'min': 187}},
'versions': ['5.5.0']},
'status': 'yellow',
'timestamp': 1501666900999}
4. supervisor 配置自动启动 Elasticsearch
[program:elastic]
command=/opt/elasticsearch/bin/elasticsearch
directory=/opt/elasticsearch/bin
stdout_logfile=/var/log/supervisor/elastic.log
environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/jdk/bin:/root/go/bin:/opt/jdk/bin
autostart=true
autorestart=true
redirect_stderr=true
stopsignal=QUIT
user=elastic