Last active
May 13, 2017 19:57
-
-
Save zv3/2c504ef978aab20ff12734aef3b96314 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package repositories | |
import javax.inject.{Singleton, Inject} | |
import com.os.shared.contentapi.api._ | |
import com.os.shared.contentapi._ | |
import com.sksamuel.elastic4s.ElasticDsl._ | |
import com.sksamuel.elastic4s.QueryDefinition | |
import common.Logging | |
import implicits.ESFormats | |
import implicits.Lists.RichLists | |
import lib.ElasticSearch.ESConfig | |
import models._ | |
import org.elasticsearch.search.sort.SortOrder | |
import play.api.libs.json.Json | |
import scala.concurrent.Future | |
import scala.concurrent.ExecutionContext.Implicits.global | |
@Singleton | |
class ContentRepository @Inject() (esConfig: ESConfig) extends Logging { | |
val client = esConfig.client | |
val index = esConfig.index -> "item" | |
import ESFormats._ | |
def GetItem(id: String, showFields: Seq[String], showTags: Seq[String], pageSize: Int, pageNumber: Int): Future[ItemResponse] = { | |
val esQuery = client execute { | |
val req = search in index limit 1 query { | |
bool { | |
must(matchAllQuery) filter { | |
should( | |
termQuery("id", id), | |
termQuery("webPath", id), | |
termQuery("content.fields.shortPath", id) | |
) | |
} | |
} | |
} | |
log.debug("[GetItem] Searching: " + req.show) | |
req | |
} | |
esQuery.flatMap { resp => | |
log.debug("[GetItem] Request response: " + resp.toString) | |
resp.hits.toSeq.flatMap { hit => | |
log.debug("[GetItem] Hit response: " + hit.sourceAsString) | |
val esContentItem = Json.parse(hit.sourceAsString).as[ESItem] | |
val maybeItemResponse: Option[Future[ItemResponse]] = | |
esContentItem.content.map(d => parseESContent(d, showTags) { content => | |
ItemResponse( | |
status = "ok", | |
content = Some(content) | |
) | |
}) orElse | |
esContentItem.tag.map { t => | |
val contentF = SearchContent( | |
ContentQueryCriteria(q = "", orTagIDs = Some(Seq(t.id))), | |
showFields, | |
showTags, | |
"newest", | |
pageSize, | |
pageNumber | |
) | |
contentF.flatMap { docResp => | |
parseESTag(t) { tag => | |
ItemResponse( | |
status = "ok", | |
total = Some(docResp.total), | |
tag = Some(tag), | |
pageSize = Some(docResp.pageSize), | |
currentPage = Some(docResp.currentPage), | |
pages = Some(docResp.pages), | |
orderBy = Some(docResp.orderBy), | |
results = Some(docResp.results) | |
) | |
} | |
} | |
} orElse | |
esContentItem.section.map { s => | |
val contentF = SearchContent( | |
ContentQueryCriteria(q = "", sectionIDs = Some(Seq(s.id))), | |
showFields, | |
showTags, | |
"newest", | |
pageSize, | |
pageNumber | |
) | |
contentF.flatMap { docResp => | |
parseESSection(s) { section => | |
ItemResponse( | |
status = "ok", | |
total = Some(docResp.total), | |
section = Some(section), | |
pageSize = Some(docResp.pageSize), | |
currentPage = Some(docResp.currentPage), | |
pages = Some(docResp.pages), | |
orderBy = Some(docResp.orderBy), | |
results = Some(docResp.results) | |
) | |
} | |
} | |
} orElse Some(Future.failed(new ApiException("Couldn't parse the response"))) | |
maybeItemResponse | |
}.head | |
} | |
} | |
private def parseESSection[A](esSection: ESSection)(f: Section => A): Future[A] = { | |
log.debug("[parseESSection] Parsing response as Section") | |
Future.successful( | |
f(ESSection.convertToDTO(esSection)) | |
) | |
} | |
private def parseESContent[A](esContent: ESContent, showTags: Seq[String])(f: Content => A): Future[A] = { | |
log.debug("[parseESContent] Parsing response as Content") | |
val tagIDs = esContent.tags.map(_.map(_.id).toSeq) | |
val tagsF = tagIDs.fold(Future.successful(Seq.empty[Tag])) { ids => | |
if (showTags.contains("all")) | |
getTagsByIDs(ids) | |
else { | |
val filteredTagIDs = ids.intersect(showTags) | |
getTagsByIDs(filteredTagIDs) | |
} | |
} | |
val sectionF = getSectionByIDs(Seq(esContent.section.id)) | |
val elements = esContent.elements.fold(Seq.empty[Element]) { elements => | |
elements map ESElement.convertToDTO | |
} | |
val references = esContent.references.fold(Seq.empty[ReferenceMetadata]) { references => | |
references map ESReferenceMetadata.convertToDTO | |
} | |
val maybeLeadTagID = esContent.tags.flatMap(_.find(_.lead == true)).map(_.id) | |
for { | |
tags <- tagsF | |
section <- sectionF | |
if section.nonEmpty | |
} yield { | |
val maybeLeadTag = maybeLeadTagID.flatMap { leadTagID => tags.find(_.id == leadTagID) } | |
val tagsWithLeadTagFirst = maybeLeadTag map { tags.toList.toHead } getOrElse tags | |
f(ESContent.convertToDTO(esContent, section.head, tagsWithLeadTagFirst, elements, references)) | |
} | |
} | |
private def parseESTag[A](esTag: ESTag)(f: Tag => A): Future[A] = { | |
log.debug("[parseESTag] Parsing response as Tag") | |
val sectionsF = esTag.section map { esSection => | |
getSectionByIDs(Seq(esSection.id)) | |
} getOrElse Future.successful(Seq.empty) | |
sectionsF map { sections => | |
val section = sections.headOption | |
f(ESTag.convertToDTO(esTag, section)) | |
} | |
} | |
def SearchTags(criteria: TagQueryCriteria, pageSize: Int, pageNumber: Int): Future[SearchResponse[Tag]] = { | |
val queries = | |
if (criteria.q.isEmpty) | |
matchAllQuery | |
else | |
bool { | |
should( | |
matchQuery("id", criteria.q), | |
matchQuery("tag.webTitle", criteria.q) | |
) | |
} | |
val sectionFilter: Option[QueryDefinition] = Option(criteria.sectionIDs).filter(_.nonEmpty) | |
.map (ids => termsQuery("tag.section.id", ids: _*)) | |
val filters = Seq(existsQuery("section")) ++ sectionFilter | |
val esQueryBody = | |
if (sectionFilter.nonEmpty) | |
bool { must(queries) filter must(filters) } | |
else | |
queries | |
val offset = pageSize * (pageNumber - 1) | |
val esQuery = client execute { | |
search in index limit pageSize from offset query { | |
esQueryBody | |
} sourceInclude "tag" | |
} | |
esQuery flatMap { resp => | |
val futures = resp.hits.toSeq flatMap { hit => | |
val esContentItem = Json.parse(hit.sourceAsString).as[ESItem] | |
esContentItem.tag map(t => parseESTag(t)(tag => tag)) | |
} | |
Future.sequence(futures) map { tags => | |
SearchResponse[Tag]( | |
status = "ok", | |
total = resp.totalHits.toInt, | |
pageSize = pageSize, | |
pages = Math.ceil(resp.totalHits.toDouble / pageSize).toInt, | |
currentPage = pageNumber, | |
orderBy = "relevance", | |
results = tags | |
) | |
} | |
} | |
} | |
def SearchSections(criteria: SectionQueryCriteria, pageSize: Int, pageNumber: Int): Future[SearchResponse[Section]] = { | |
val queries = | |
if (criteria.q.isEmpty) | |
matchAllQuery | |
else | |
bool { | |
should( | |
matchQuery("id", criteria.q), | |
matchQuery("section.webTitle", criteria.q) | |
) | |
} | |
val offset = pageSize * (pageNumber - 1) | |
val esQuery = client execute { | |
search in index limit pageSize from offset query { | |
bool { | |
must { | |
queries | |
} filter { | |
must { | |
existsQuery("section") | |
} | |
} | |
} | |
} sourceInclude "section" | |
} | |
esQuery flatMap { resp => | |
val futures = resp.hits.toSeq flatMap { hit => | |
val esContentItem = Json.parse(hit.sourceAsString).as[ESItem] | |
esContentItem.section map(s => parseESSection(s)(section => section)) | |
} | |
Future.sequence(futures) map { sections => | |
SearchResponse[Section]( | |
status = "ok", | |
total = resp.totalHits.toInt, | |
pageSize = pageSize, | |
pages = Math.ceil(resp.totalHits.toDouble / pageSize).toInt, | |
currentPage = pageNumber, | |
orderBy = "relevance", | |
results = sections | |
) | |
} | |
} | |
} | |
val docOrderByScoreDesc = scoreSort().order(SortOrder.DESC) | |
val docOrderByDateDesc = fieldSort("content.firstPublishedDate").order(SortOrder.DESC) | |
val docOrderByDateAsc = fieldSort("content.firstPublishedDate").order(SortOrder.ASC) | |
def SearchContent(criteria: ContentQueryCriteria, showFields: Seq[String], showTags: Seq[String], maybeOrderBy: String, pageSize: Int, pageNumber: Int): Future[SearchResponse[Content]] = { | |
val queries = | |
if (criteria.q.isEmpty) | |
matchAllQuery | |
else | |
bool { | |
must { | |
should( | |
commonQuery("content.webTitle") query criteria.q lowFreqMinimumShouldMatch 2, | |
commonQuery("content.fields.headline") query criteria.q lowFreqMinimumShouldMatch 2, | |
commonQuery("content.fields.standFirst") query criteria.q lowFreqMinimumShouldMatch 2, | |
commonQuery("content.fields.trailText") query criteria.q lowFreqMinimumShouldMatch 2, | |
commonQuery("content.fields.body") query criteria.q lowFreqMinimumShouldMatch 2 | |
) should matchPhraseQuery("content.fields.body", criteria.q).setLenient(true) | |
} | |
} | |
val idsFilter: Option[QueryDefinition] = criteria.ids filter(_.nonEmpty) map { ids => | |
bool { | |
should ( | |
termsQuery("id", ids: _*), | |
termsQuery("content.internalID", ids: _*), | |
termsQuery("content.fields.shortPath", ids: _*) | |
) | |
} | |
} | |
val sectionFilter: Option[QueryDefinition] = criteria.sectionIDs.filter(_.nonEmpty) | |
.map(termsQuery("content.section.id", _: _*)) | |
val orTagsFilter: Option[QueryDefinition] = criteria.orTagIDs filter(_.nonEmpty) map { ids => | |
nestedQuery("content.tags") query { | |
termsQuery("content.tags.id", ids: _*) | |
} | |
} | |
val andTagsFilter: Option[QueryDefinition] = criteria.andTagIDs.filter(_.nonEmpty).map(_.map { id => | |
nestedQuery("content.tags") query { | |
termQuery("content.tags.id", id) | |
} | |
}).map(filter => bool(must(filter))) | |
val notFilter : Option[QueryDefinition] = criteria.notTagIDs filter(_.nonEmpty) map { ids => | |
not { | |
nestedQuery("content.tags") query { | |
termsQuery("content.tags.id", ids: _*) | |
} | |
} | |
} | |
val mustFilters = existsQuery("content") +: (idsFilter ++ andTagsFilter ++ sectionFilter ++ orTagsFilter).toSeq | |
val esQueryBody = | |
if (mustFilters.nonEmpty) | |
bool { must(queries) filter must(mustFilters) not notFilter } | |
else | |
queries | |
val offset = pageSize * (pageNumber - 1) | |
val sortBody = maybeOrderBy match { | |
case "relevance" => Seq(docOrderByScoreDesc, docOrderByDateDesc) | |
case "oldest" => Seq(docOrderByDateAsc) | |
case _ => Seq(docOrderByDateDesc) | |
} | |
val esQuery = client execute { | |
val req = search in index limit pageSize from offset query { | |
esQueryBody | |
} sort(sortBody: _*) sourceInclude(showFields: _*) | |
log.debug("[SearchContent] Searching: " + req.show) | |
req | |
} | |
esQuery flatMap { resp => | |
val futures = resp.hits.toSeq flatMap { hit => | |
val esContentItem = Json.parse(hit.sourceAsString).as[ESItem] | |
esContentItem.content map(d => parseESContent(d, showTags)(content => content)) | |
} | |
Future.sequence(futures) map { listOfContentItems => | |
SearchResponse[Content]( | |
status = "ok", | |
total = resp.totalHits.toInt, | |
pageSize = pageSize, | |
pages = Math.ceil(resp.totalHits.toDouble / pageSize).toInt, | |
currentPage = pageNumber, | |
orderBy = "relevance", | |
results = listOfContentItems | |
) | |
} | |
} | |
} | |
private def getTagsByIDs(ids: Seq[String]): Future[Seq[Tag]] = { | |
val esQuery = client execute { | |
val req = search in index query { | |
bool { | |
must(matchAllQuery) | |
filter { | |
bool { | |
must( | |
existsQuery("tag"), | |
termsQuery("id", ids: _*) | |
) | |
} | |
} | |
} | |
} sourceInclude "tag" | |
log.debug("[getTagsByIDs] Searching: " + req.show) | |
req | |
} | |
esQuery.flatMap { resp => | |
log.debug("[getTagsByIDs] Total hits: " + resp.totalHits) | |
val items = resp.hits.map(esItem => Json.parse(esItem.sourceAsString).as[ESItem]).toSeq | |
val sectionIDs = items.flatMap(_.section.map(_.id)).distinct | |
val sectionF = Option(sectionIDs).filter(_.nonEmpty) map getSectionByIDs getOrElse Future.successful(Seq.empty[Section]) | |
sectionF.map { sections => | |
items flatMap { item => | |
item.tag map { t => | |
val section = t.section flatMap (s => sections.find(_.id == s.id)) | |
ESTag.convertToDTO(t, section) | |
} | |
} | |
} | |
} | |
} | |
private def getSectionByIDs(ids: Seq[String]): Future[Seq[Section]] = { | |
val esQuery = client execute { | |
val req = search in index query { | |
bool { | |
must(matchAllQuery) | |
filter { | |
bool { | |
must( | |
existsQuery("section"), | |
termsQuery("id", ids: _*) | |
) | |
} | |
} | |
} | |
} sourceInclude "section" | |
log.debug("[getSectionByIDs] Searching: " + req.show) | |
req | |
} | |
esQuery map { resp => | |
log.debug("[getSectionByIDs] Total hits: " + resp.totalHits) | |
resp.hits.toSeq.flatMap { h => | |
log.debug("[getSectionByIDs] Converting hit to DTO") | |
val item = Json.parse(h.sourceAsString).as[ESItem] | |
item.section map ESSection.convertToDTO | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package lib.ElasticSearch | |
import com.sksamuel.elastic4s.ElasticDsl._ | |
import com.sksamuel.elastic4s.analyzers.StandardAnalyzer | |
import com.sksamuel.elastic4s.mappings.{FieldType, TypedFieldDefinition} | |
object Mappings { | |
lazy val sectionMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType copyTo "id" index "no", | |
field("webPath") typed FieldType.StringType copyTo "webPath" index "no", | |
field("internalID") typed FieldType.IntegerType, | |
field("webTitle") typed FieldType.StringType analyzer "english_s_stemmer" | |
) | |
lazy val sectionRelMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType index NotAnalyzed, | |
field("internalID") typed FieldType.IntegerType, | |
field("webTitle") typed FieldType.StringType index "no" | |
) | |
lazy val elementAssetTypeDataMapping: Seq[TypedFieldDefinition] = Seq( | |
field("source") typed FieldType.StringType index "no", | |
field("photographer") typed FieldType.StringType index "no", | |
field("credit") typed FieldType.StringType index "no", | |
field("displayCredit") typed FieldType.BooleanType, | |
field("altText") typed FieldType.StringType index "no", | |
field("captionTitle") typed FieldType.StringType index "no", | |
field("caption") typed FieldType.StringType index "no", | |
field("description") typed FieldType.StringType index "no", | |
field("embedType") typed FieldType.StringType index "no", | |
field("thumbnailUrl") typed FieldType.StringType index "no", | |
field("durationSeconds") typed FieldType.IntegerType, | |
field("embeddable") typed FieldType.BooleanType, | |
field("blockAds") typed FieldType.BooleanType, | |
field("ageRestricted") typed FieldType.BooleanType, | |
field("mediaID") typed FieldType.StringType index "no" | |
) | |
lazy val elementAssetFilesMapping: Seq[TypedFieldDefinition] = Seq( | |
field("url") typed FieldType.StringType index "no", | |
field("mimeType") typed FieldType.StringType index "no", | |
field("width") typed FieldType.IntegerType, | |
field("height") typed FieldType.IntegerType, | |
field("isMaster") typed FieldType.BooleanType | |
) | |
lazy val assetMapping: Seq[TypedFieldDefinition] = Seq( | |
field("type") typed FieldType.StringType index "no", | |
field("files") typed FieldType.NestedType nested(elementAssetFilesMapping: _*), | |
field("typeData") typed FieldType.ObjectType inner(elementAssetTypeDataMapping: _*) | |
) | |
lazy val elementMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType index "no", | |
field("type") typed FieldType.StringType index "no", | |
field("relation") typed FieldType.StringType index "no", | |
field("galleryIndex") typed FieldType.IntegerType index "no", | |
field("assets") typed FieldType.NestedType nested(assetMapping: _*) | |
) | |
lazy val referenceMetadataMapping: Seq[TypedFieldDefinition] = Seq( | |
field("type") typed FieldType.StringType index NotAnalyzed, | |
field("id") typed FieldType.StringType index NotAnalyzed | |
) | |
lazy val tagRelMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType index NotAnalyzed, | |
field("internalID") typed FieldType.IntegerType, | |
field("lead") typed FieldType.BooleanType | |
) | |
lazy val reviewMapping: Seq[TypedFieldDefinition] = Seq( | |
field("score") typed FieldType.DoubleType, | |
field("pros") typed FieldType.StringType index "no", | |
field("cons") typed FieldType.StringType index "no", | |
field("bottomLine") typed FieldType.StringType index "no" | |
) | |
lazy val tagMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType copyTo "id" index "no", | |
field("webPath") typed FieldType.StringType copyTo "webPath" index "no", | |
field("type") typed FieldType.StringType index NotAnalyzed, | |
field("internalID") typed FieldType.IntegerType, | |
field("section") typed FieldType.ObjectType as(sectionRelMapping: _*), | |
field("webTitle") typed FieldType.StringType analyzer "english_s_stemmer", | |
field("description") typed FieldType.StringType analyzer "english_s_stemmer", | |
field("references") typed FieldType.NestedType nested(referenceMetadataMapping: _*), | |
field("firstName") typed FieldType.StringType index "no", | |
field("lastName") typed FieldType.StringType index "no", | |
field("contributorImageUrl") typed FieldType.StringType index "no", | |
field("bio") typed FieldType.StringType index "no", | |
field("twitterHandle") typed FieldType.StringType index "no" , | |
field("byLineImageUrl") typed FieldType.StringType index "no", | |
field("byLineLargeImageUrl") typed FieldType.StringType index "no" | |
) | |
lazy val contentFieldsMapping: Seq[TypedFieldDefinition] = Seq( | |
field("shortPath") typed FieldType.StringType index NotAnalyzed, | |
field("headline") typed FieldType.StringType analyzer "english_s_stemmer", | |
field("trailText") typed FieldType.StringType analyzer StandardAnalyzer, | |
field("standFirst") typed FieldType.StringType analyzer StandardAnalyzer, | |
field("byLine") typed FieldType.StringType analyzer StandardAnalyzer, | |
field("commentable") typed FieldType.BooleanType, | |
field("showInRelatedContent") typed FieldType.BooleanType, | |
field("commentCloseDate") typed FieldType.DateType, | |
field("lastModifiedDate") typed FieldType.DateType, | |
field("body") typed FieldType.StringType analyzer "html_analyzer", | |
field("review") typed FieldType.ObjectType as(reviewMapping: _*) | |
) | |
lazy val contentMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType copyTo "id" index "no", | |
field("webPath") typed FieldType.StringType copyTo "webPath" index "no", | |
field("type") typed FieldType.StringType index NotAnalyzed, | |
field("internalID") typed FieldType.IntegerType, | |
field("section") typed FieldType.ObjectType as(sectionRelMapping: _*), | |
field("webTitle") typed FieldType.StringType analyzer "english_s_stemmer", | |
field("firstPublishedDate") typed FieldType.DateType format "epoch_millis", | |
field("createdBy") typed FieldType.StringType index "no", | |
field("elements") typed FieldType.NestedType nested(elementMapping: _*), | |
field("tags") typed FieldType.NestedType nested(tagRelMapping: _*), | |
field("fields") typed FieldType.ObjectType as(contentFieldsMapping: _*), | |
field("references") typed FieldType.NestedType nested(referenceMetadataMapping: _*) | |
) | |
lazy val contentItemMapping: Seq[TypedFieldDefinition] = Seq( | |
field("id") typed FieldType.StringType index NotAnalyzed, | |
field("webPath") typed FieldType.StringType index NotAnalyzed, | |
field("content") typed FieldType.ObjectType as(contentMapping: _*), | |
field("tag") typed FieldType.ObjectType as(tagMapping: _*), | |
field("section") typed FieldType.ObjectType as(sectionMapping: _*) | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment