Created
March 7, 2014 20:11
-
-
Save gibrown/9419022 to your computer and use it in GitHub Desktop.
WP.com related posts query building
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static function post( $blog_id, $post_id, $mlt_fields = array( 'mlt_content' ), $analyzer = false ) { | |
switch_to_blog( $blog_id ); | |
$post = get_post( $post_id ); | |
$fld_bldr = new ES_WP_Field_Builder(); | |
$tax_data = $fld_bldr->taxonomy( $post ); | |
$mlt_content = $fld_bldr->mlt_content( array( | |
'title' => $fld_bldr->clean_string( $post->post_title ), | |
'content' => $fld_bldr->clean_string( $post->post_content ), | |
'excerpt' => $fld_bldr->clean_string( $post->post_excerpt ), | |
'tags' => isset( $tax_data['tag'] ) ? wp_list_pluck( $tax_data['tag'], 'name' ) : array(), | |
'cats' => isset( $tax_data['category'] ) ? wp_list_pluck( $tax_data['category'], 'name' ) : array(), | |
'max_bytes' => 1000, //~200 English words | |
) ); | |
restore_current_blog(); | |
if ( ! preg_match( '/\p{L}/', $mlt_content ) ) | |
return false; //no utf-8 letters, we can't detect anything | |
$query = array(); | |
//For short content (< 25 words), just use a match query | |
if ( strlen( $mlt_content ) < 125 ) { | |
$query = array( | |
'multi_match' => array( | |
'query' => $mlt_content, | |
'fields' => $mlt_fields, | |
) | |
); | |
if ( $analyzer ) | |
$query['multi_match']['analyzer'] = $analyzer; | |
} else { | |
$query = array( | |
'more_like_this' => array_filter( | |
array( | |
'fields' => $mlt_fields, | |
'like_text' => $mlt_content, | |
'percent_terms_to_match' => 0.08, // Default .3, lower to match 2 terms out of 25 max_query_terms | |
'min_term_freq' => null, // Default 2 | |
'max_query_terms' => null, // Default 25 | |
'stop_words' => null, | |
'min_doc_freq' => null, // Default 5 | |
'max_doc_freq' => null, // Default inf | |
'min_word_len' => null, // Default 0 | |
'max_word_len' => null, // Default inf | |
'boost_terms' => 5, // Default 1 (This is the max boost value per term, actual boost values for each term is porportional to how often it appears in docs.) | |
) | |
) | |
); | |
if ( $analyzer ) | |
$query['more_like_this']['analyzer'] = $analyzer; | |
} | |
return $query; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment