Created
November 6, 2015 09:03
-
-
Save frague59/aab071f0bdce5b010ce4 to your computer and use it in GitHub Desktop.
search features with attachment supports
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Search features for an elasticsearch / haystack / elasticstack | |
:creationdate: 05/11/15 15:05 | |
:moduleauthor: François GUÉRIN <[email protected]> | |
:modulename: intrautils.search | |
""" | |
import base64 | |
import json | |
import logging | |
import mimetypes | |
from django import forms | |
from django.conf import settings | |
from django.db.models.fields.files import FieldFile as dj_File | |
from django.templatetags.static import static | |
from django.utils.translation import ugettext_lazy as _ | |
from elasticstack.backends import ConfigurableElasticBackend, ConfigurableElasticSearchEngine | |
from elasticstack.fields import FacetField | |
from elasticstack.forms import SearchForm | |
from filer.models import File as fi_File | |
from form_utils.forms import BetterForm | |
from haystack import DEFAULT_ALIAS | |
from haystack.constants import DJANGO_CT, DJANGO_ID | |
from haystack.fields import SearchField | |
from haystack.forms import model_choices | |
from utils import widgets as u_widgets | |
from utils.forms import CollapsibleFieldsetFormMixin | |
__author__ = 'fguerin' | |
logger = logging.getLogger('intrautils.search') | |
DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'} | |
FIELD_MAPPINGS = { | |
'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'}, | |
'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'}, | |
'date': {'type': 'date'}, | |
'datetime': {'type': 'date'}, | |
'location': {'type': 'geo_point'}, | |
'boolean': {'type': 'boolean'}, | |
'float': {'type': 'float'}, | |
'long': {'type': 'long'}, | |
'integer': {'type': 'long'}, | |
'attachment': {'type': 'attachment'}, | |
} | |
class ExtendedElasticsearchBackend(ConfigurableElasticBackend): | |
""" | |
Adds `attachment` support for elasticsearch backend settings | |
""" | |
def build_schema(self, fields): | |
""" | |
Merge from haystack and elasticstack elasticsearch backend `build_shema` methods. | |
It provides an additional feuture : custom field mappings, from settings or default FIELD_MAPPINGS dict. | |
:param fields: | |
:return: | |
""" | |
content_field_name = '' | |
mapping = { | |
DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, | |
DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, | |
} | |
field_mappings = getattr(settings, 'ELASTICSEARCH_FIELD_MAPPINGS', FIELD_MAPPINGS) | |
default_field_mappings = getattr(settings, 'ELASTICSEARCH_DEFAULT_FIELD_MAPPINGS', DEFAULT_FIELD_MAPPING) | |
for field_name, field_class in fields.items(): | |
field_mapping = field_mappings.get(field_class.field_type, default_field_mappings).copy() | |
if field_class.boost != 1.0: | |
field_mapping['boost'] = field_class.boost | |
if field_class.document is True: | |
content_field_name = field_class.index_fieldname | |
# Do this last to override `text` fields. | |
if field_mapping['type'] == 'string' and field_class.indexed: | |
if not hasattr(field_class, 'facet_for') and not field_class.field_type in ('ngram', 'edge_ngram'): | |
field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER) | |
mapping[field_class.index_fieldname] = field_mapping | |
return content_field_name, mapping | |
class ExtendedElasticSearchEngine(ConfigurableElasticSearchEngine): | |
backend = ExtendedElasticsearchBackend | |
class AttachmentField(SearchField): | |
field_type = 'attachment' | |
author_field = 'author' | |
def __init__(self, **kwargs): | |
if 'content_type_field' in kwargs: | |
self.content_type_field = kwargs.pop('content_type_field') | |
if 'author_field' in kwargs: | |
self.author_field = kwargs.pop('author_field') | |
super(AttachmentField, self).__init__(**kwargs) | |
def convert(self, value): | |
output = value | |
return output | |
@staticmethod | |
def _get_file_data(field): | |
if isinstance(field, fi_File): | |
field_file = field.file | |
name = field.label | |
try: | |
content_length = len(field_file) | |
except TypeError: | |
content_length = len(field_file.file) | |
content_type = mimetypes.guess_type(name) | |
try: | |
content = base64.b64encode(field_file.read()) | |
except AttributeError: | |
content = base64.b64encode(field_file) | |
else: # isinstance(field, dj_File): | |
field_file = field | |
try: | |
content_length = len(field_file) | |
except TypeError: | |
content_length = len(field_file.file) | |
content_type = None | |
name = None | |
try: | |
content = base64.b64encode(field_file.read()) | |
except AttributeError: | |
content = base64.b64encode(field_file) | |
output = {'_language': 'fr', | |
'_content': content, | |
'_content_type': content_type, | |
'_name': name, | |
'_title': name, | |
'_content_length': content_length | |
} | |
return output | |
def prepare(self, obj): | |
if self.model_attr: | |
field = getattr(obj, self.model_attr) | |
else: | |
field = obj | |
if not isinstance(field, (dj_File, fi_File)): | |
raise NotImplementedError('AttachmentField does not implement file reading for %s file' | |
% field.__class__.__name__) | |
output = self._get_file_data(field) | |
if settings.DEBUG: | |
import copy | |
_output = copy.deepcopy(output) | |
_output.update({'_content': _output['_content'][:50] + '...'}) | |
logger.debug(u'AttachmentField::prepare() output = %s', json.dumps(_output, indent=2)) | |
return output | |
class FacetedAttachmentField(FacetField, AttachmentField): | |
pass | |
def application_model_choices(app_name, using=DEFAULT_ALIAS): | |
choices = model_choices(using) | |
output = [] | |
for choice in choices: | |
if app_name in choice[0]: | |
output.append(choice) | |
return output | |
class HaystackSearchForm(CollapsibleFieldsetFormMixin, BetterForm, SearchForm): | |
""" | |
haystack search form for main `searching` feature | |
""" | |
class Media: | |
js = (static('bootstrap-collapsible-fieldset/bootstrap-collapsible-fieldset.js'),) | |
css = {'all': (static('bootstrap-collapsible-fieldset/bootstrap-collapsible-fieldset.css'),)} | |
class Meta: | |
collapsed = True | |
fieldsets = (('main', {'legend': _('search'), 'fields': ('q', 'models')}),) | |
search_app = None | |
models = forms.MultipleChoiceField(choices=application_model_choices('intrapubs'), | |
required=False, | |
label=_('Search in'), | |
widget=u_widgets.ColumnCheckboxSelectMultiple(columns=3)) | |
def get_search_app(self): | |
if self.search_app: | |
return self.search_app | |
raise NotImplementedError('%s must provide a search_app attribute or override get_search_app() method.') | |
def get_models(self): | |
""" | |
Return an alphabetical list of model classes in the index. | |
""" | |
search_models = [] | |
if self.is_valid(): | |
for model in self.cleaned_data['models']: | |
# noinspection PyUnresolvedReferences | |
search_models.append(dj_models.get_model(*model.split('.'))) | |
return search_models | |
def search(self): | |
search_app = self.get_search_app() | |
search_query_set = super(HaystackSearchForm, self).search() | |
settings.DEBUG and logger.debug(u'HaystackSearchForm::search() len(search_query_set) = %d ' | |
u'(before models filtering)', len(search_query_set)) | |
if not search_query_set: | |
return [] | |
search_query_set = search_query_set.models(*self.get_models()) | |
if isinstance(search_app, basestring): | |
search_query_set = search_query_set.filter(django_ct__contains=search_app) | |
elif isinstance(search_app, (tuple, list)): | |
for app in search_app: | |
search_query_set = search_query_set.filter_or(django_ct__contains=app) | |
settings.DEBUG and logger.debug(u'HaystackSearchForm::search() len(search_query_set) = %d ' | |
u'(after models filtering)', len(search_query_set)) | |
return search_query_set | |
def no_query_found(self): | |
return [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment