OnionIngestor/onioningestor/databases/elasticsearch.py

84 lines
2.5 KiB
Python
Raw Normal View History

import sys
import traceback
from elasticsearch import Elasticsearch, helpers
2020-07-19 21:56:54 +00:00
from onioningestor.databases import PastieStorage
class Plugin(PastieStorage):
def __init__(self, logger, **kwargs):
self.name = kwargs.get('name')
self.logger = logger
self.logger.info('Creating Elasticsearch mapping')
2020-07-19 21:56:54 +00:00
self.config = kwargs
2020-07-13 14:03:51 +00:00
self.mapping = """
{
"mappings": {
"_doc": {
"properties": {
"hiddenService": {
"type": "text"
},
"blacklist": {
"type": "keyword"
2020-07-13 14:03:51 +00:00
},
"monitor": {
2020-07-07 18:22:44 +00:00
"type": "boolean",
"null_value": "false"
},
"simple-html": {
"type": "nested",
"properties": {
"HTML": {
"type": "long"
},
"title": {
"type": "text"
},
"language": {
"type": "text"
},
"status":{
"type":"text"
},
"date-indexed": {
"type": "date"
2020-07-07 18:22:44 +00:00
},
"interestingKeywords":{
"type": "keyword"
}
}
}
}
}
}
}
2020-07-13 14:03:51 +00:00
"""
self.index = self.config['index']
try:
self.es = Elasticsearch([{
'host':self.config['host'],
'port':self.config['port']}])
self.es.indices.create(
index=self.index,
body=self.mapping,
ignore=400)
except Exception as e:
self.logger.error(e)
self.logger.error(traceback.format_exc())
sys.exit(0)
def count(self):
self.es.indices.refresh(self.index)
status = self.es.count(index=self.index)
if status['_shards']['successful'] == 1:
2020-07-07 18:22:44 +00:00
self.logger.info('Successful Indexed Item on Elasticsearch')
self.logger.info('Current Items Count:%d',status['count'])
else:
self.logger.error(status)
2020-07-19 21:56:54 +00:00
def __save_pastie__(self, onion):
if onion:
status = self.es.index(index=self.index,body=onion.asdict())
2020-07-07 18:22:44 +00:00
self.count()