2015-01-06 109 views
0

我曾经使用过django,haystack和elasticsearch。搜索多个单词elasticsearch haystack

我search_index.py:

from haystack import indexes 
from models import Advertisement 



class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable): 
    text = indexes.CharField(document=True, use_template=True) 
    make = indexes.CharField() 
    section = indexes.CharField() 
    subcategory = indexes.CharField() 
    content = indexes.CharField(model_attr='content') 
    images = indexes.CharField(model_attr='images') 

    def get_model(self): 
     return Advertisement 

    def index_queryset(self, using=None): 
     return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory') 

搜索表单:

<form action="/search" method="get"> 
     <input type="text-search" name="q"> 
     <input type="submit" value=""> 
    </form> 

模板:

{% block content %} 

{% for result in page.object_list %} 
    <p>{{ result.object.title }}</p> 
    <p>{{ result.object.content }}</p> 
    <p>{{ result.object.images }}</p> 
    <p>{{ result.object.make }}</p> 
    <p>{{ result.object.section }}</p> 
    <p>{{ result.object.subcategory }}</p> 
{% empty %} 
    <p>No result.</p> 
{% endfor %} 

{% endblock %} 

看着curl -XGET "http://localhost:9200/_search?q=fender+boss" 我得到的所有的值,其中有 “老大”和“挡泥板”

当您在搜索框“老板挡泥板”中键入我没有结果。从搜索表单中我只能得到一个单词的结果,例如“老板”。 如何使搜索多个单词的能力?

回答

2

本月我陷入了这个问题。

为了执行正确的查询,您需要覆盖一些干草堆对象。我发现这篇文章非常有帮助Extending Haystack’s Elasticsearch backend。在开始时相当复杂,但一旦明白它是如何工作的......它的工作原理:-)

博客文章讲授如何实现elasticsearch的嵌套查询......以及...我实现了一个基本的multi_match query

# -*- coding: utf-8 -*- 
from __future__ import absolute_import 

from django.conf import settings 

from haystack.backends.elasticsearch_backend import (
    ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery) 
from haystack.query import SearchQuerySet 


class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend): 
    DEFAULT_ANALYZER = "snowball" 

    def __init__(self, connection_alias, **connection_options): 
     super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options) 

     user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {}) 
     if user_settings: 
      setattr(self, 'DEFAULT_SETTINGS', user_settings) 

     user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '') 
     if user_analyzer: 
      setattr(self, 'DEFAULT_ANALYZER', user_analyzer) 

    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, 
          fields='', highlight=False, facets=None, 
          date_facets=None, query_facets=None, 
          narrow_queries=None, spelling_query=None, 
          within=None, dwithin=None, distance_point=None, 
          models=None, limit_to_registered_models=None, 
          result_class=None, multi_match=None): 

     out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset, 
                       end_offset, 
                       fields, highlight, facets, 
                       date_facets, query_facets, 
                       narrow_queries, spelling_query, 
                       within, dwithin, distance_point, 
                       models, limit_to_registered_models, 
                       result_class) 

     if multi_match: 
      out['query'] = { 
       'multi_match': { 
        'query': multi_match['query'], 
        'fields': multi_match['fields'], 
        'tie_breaker': multi_match['tie_breaker'], 
        'minimum_should_match': multi_match['minimum_should_match'], 
       } 
      } 

     return out 

    def build_schema(self, fields): 
     content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields) 

     for field_name, field_class in fields.items(): 
      field_mapping = mapping[field_class.index_fieldname] 

      if field_mapping['type'] == 'string' and field_class.indexed: 
       if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'): 
        field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER) 
      mapping.update({field_class.index_fieldname: field_mapping}) 

     return content_field_name, mapping 

    def multi_match_run(self, query, fields, minimum_should_match, tie_breaker): 
     from elasticsearch_dsl import Search 
     from elasticsearch_dsl.query import MultiMatch 

     raw = Search().using(self.conn).query(
      MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker) 
     ).execute() 

     return self._process_results(raw) 


class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery): 
    def multi_match(self, query, fields, minimum_should_match, tie_breaker): 
     results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker) 
     self._results = results.get('results', []) 
     self._hit_count = results.get('hits', 0) 

    def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker): 
     self.multi_match_query = { 
      'query': query, 
      'fields': fields, 
      'minimum_should_match': minimum_should_match, 
      'tie_breaker': tie_breaker 
     } 

    def build_params(self, spelling_query=None, **kwargs): 
     search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs) 
     if self.multi_match_query: 
      search_kwargs['multi_match'] = self.multi_match_query 

     return search_kwargs 


class ElasticsearchSearchQuerySetCustom(SearchQuerySet): 
    def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3): 
     clone = self._clone() 
     clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker) 
     clone.query.multi_match(query, fields, minimum_should_match, tie_breaker) 
     return clone 


class ElasticsearchEngineCustom(ElasticsearchSearchEngine): 
    backend = ElasticsearchEngineBackendCustom 
    query = ElasticsearchSearchQueryCustom 

正如你可以看到我用elasticsearc-dsl执行查询(MultiMatch)和这句话总结了博客文章:ElasticsearchSearchQuerySetCustom().multi_match(...)呼叫的方式取决于ElasticsearchSearchQueryCustom取决于ElasticsearchEngineBackendCustom

然后把你的设置elasticsearch配置,如:

ELASTICSEARCH_DEFAULT_ANALYZER = 'italian' 
ELASTICSEARCH_INDEX_SETTINGS = { 
    "settings": {[...]} 
} 

您可以从Language Analyzers

抓住你的ELASTICSEARCH_INDEX_SETTINGS语言(S)你需要还SearchForm覆盖:

# -*- coding: utf-8 -*- 
from __future__ import absolute_import 

from haystack.forms import SearchForm 

from .backend import ElasticsearchSearchQuerySetCustom 


class SearchFormCustom(SearchForm): 
    def search(self): 
     query = self.searchqueryset.query.clean(self.cleaned_data.get('q')) 
     if not self.is_valid() or not query: 
      return self.no_query_found() 

     sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5']) 

     return sqs 

字段titletext必须在您的索引中并且脱字符字符i s用于在场上进行提升。

你需要重写URL草堆模式,以便使用自定义窗体:

urlpatterns = patterns(
    'search.views', 
    url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'), 
) 

就是这样,HTH :-)

注重不使用result.object.something但使用而不是你的索引上的字段,例如result.tilte,因为result.object.tilte遇到数据库!请参阅Haystack Best Practices

+0

非常感谢您的详细解答,但我转而使用solr – Ihar