This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/bin/python | |
import sys | |
import org_search as org | |
import pprint | |
import csv | |
#Configuration | |
lang='en_US' | |
min_count=1000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
from pylab import * | |
import math | |
from scipy.stats import beta, norm, uniform | |
from scipy.special import betaln | |
from random import random, normalvariate | |
import numpy as np |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# To prep a file for this script: | |
# - take a list of docs orig.json with one json doc per line | |
# - run: split -l 1000 orig.json orig-split | |
export ESINDEX="$1" #ES index name | |
export ESTYPE="$2" #ES document type name | |
JSONFILE="$3" #JSON file path name. One doc per line. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// This uses the wpes-lib framework to build the index: https://github.com/automattic/wpes-lib | |
class WPOrg_Plugins_Index_Builder extends VIP_Index_Builder { | |
//override to add support for all analyzers | |
public function get_settings( $args ) { | |
$defaults = array( | |
); | |
$args = wp_parse_args( $args, $defaults ); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Theme_Support_Index_Builder extends WPES_Abstract_Index_Builder { | |
public function get_config( $args ) { | |
$defaults = array( | |
'lang' => 'en', | |
); | |
$args = wp_parse_args( $args, $defaults ); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//These are some params I've used elsewhere, may want to try adjusting | |
$date_scale = '5d'; | |
$date_decay = 0.99999; | |
$date_origin = date( 'Y-m-d' ); | |
$query = array( | |
'query' => array( | |
"function_score" => array( | |
'query' => array( 'filtered' => array( | |
'query' => array( 'multi_match' => array( | |
'query' => $query, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/common/class.wpes-analyzer-builder.php b/src/common/class.wpes-analyzer-builder.php | |
index 0357ea4..5787479 100644 | |
--- a/src/common/class.wpes-analyzer-builder.php | |
+++ b/src/common/class.wpes-analyzer-builder.php | |
@@ -340,6 +340,29 @@ class WPES_Analyzer_Builder { | |
continue; | |
} | |
+ if ( 'de' == $lang ) { | |
+ ////From: http://gibrown.wordpress.com/2013/05/01/three-principles-for-multilingal-indexing-in-elasticsearch/#comment-857 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static function post( $blog_id, $post_id, $mlt_fields = array( 'mlt_content' ), $analyzer = false ) { | |
switch_to_blog( $blog_id ); | |
$post = get_post( $post_id ); | |
$fld_bldr = new ES_WP_Field_Builder(); | |
$tax_data = $fld_bldr->taxonomy( $post ); | |
$mlt_content = $fld_bldr->mlt_content( array( | |
'title' => $fld_bldr->clean_string( $post->post_title ), | |
'content' => $fld_bldr->clean_string( $post->post_content ), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function es_api_detect_lang( $text ) { | |
$lang = false; | |
//if we can't tell the lang with 5000 characters we probably can't tell the language | |
$text = mb_substr( $text, 0, 5000 ); | |
//replace non-breaking spaces so they don't match the \p{L} char class | |
$text = preg_replace( '/[\x{00A0}\x{2007}\x{202F}]/u', ' ', $text ); | |
//replace unicode symbols: see: http://www.utf8-chartable.de/unicode-utf8-table.pl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Մեր հÕ րևÕ Õ բÕ կում երկու եղբÕ յր Õ Õ Õ Õ պրում ՄÕ օր եղբÕ յրÕ երը որոÕ եցÕ Õ բÕ րձրÕ Õ Õ լ Õ րեÕ ց Õ Õ Õ Õ Õ Õ Õ քը ՔÕ Õ եցÕ Õ բերեցÕ Õ ծÕ Õ ր սÕ Õ դուղքը ու բÕ րձրÕ ցÕ Õ մութ Õ Õ Õ Õ քը Փոքր եղբÕ յրը Õ Õ Õ վÕ խկոÕ Õ ր Õ սկ մեծ եղբÕ յրը Õ Õ ըÕ դհÕ Õ Õ րÕ Õ հÕ մոզում Õ ր որ Õ մեÕ Õ Õ չ լÕ վ կլÕ Õ Õ որ չվÕ խեÕ Õ ՀÕ զÕ վ Õ Õ Õ բÕ րձրÕ ցել վերև երբ փոքր եղբÕ յրը չÕ կÕ Õ եց գեÕ Õ Õ Õ դրվÕ ծ Õ Õ խÕ Õ կÕ երը ոÕ քով դÕ պÕ վ դրÕ Õ ց ու վÕ յր ըÕ կÕ վ ՔÕ Õ Õ որ փոքր Õ ր Õ սկույÕ սկսեց լÕ ց լÕ Õ ել Õ սկ մեծ եղբÕ յրը Õ րÕ Õ հÕ Õ գսÕ Õ ցրեց Õ սելով որ դÕ կլÕ Õ Õ Õ րեÕ ց գÕ ղÕ Õ Õ քը և ոչ ոք Õ յդ մÕ սÕ Õ չÕ Õ մÕ Õ Õ | |
WordPress.com Debug | |
11:42 AM (32 minutes ago) | |
to greg | |
Text: |
NewerOlder