This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fklearn.metrics.pd_extractors import * | |
eval_auc_col = "auc_evaluator__sentiment" | |
eval_logloss_col = "logloss_evaluator__sentiment" | |
eval_precision_col = "precision_evaluator__sentiment" | |
eval_recall_col = "recall_evaluator__sentiment" | |
base_extractor = combined_evaluator_extractor(base_extractors=[evaluator_extractor(evaluator_name=eval_auc_col), | |
evaluator_extractor(evaluator_name=eval_logloss_col), | |
evaluator_extractor(evaluator_name=eval_precision_col), | |
evaluator_extractor(evaluator_name=eval_recall_col)]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fklearn.preprocessing.splitting import space_time_split_dataset | |
train_set, intime_outspace_hdout, outime_inspace_hdout, outime_outspace_hdout = \ | |
space_time_split_dataset(df, | |
train_start_date="2016-12-31", | |
train_end_date="2017-01-10", | |
holdout_end_date="2017-02-13", | |
split_seed=42, | |
space_holdout_percentage=0.2, | |
space_column="username", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fklearn.validation.evaluators import auc_evaluator, logloss_evaluator, precision_evaluator, recall_evaluator, \ | |
combined_evaluators, temporal_split_evaluator | |
def tweet_eval(target_column, prediction_column, time_column): | |
eval_args = dict(target_column=target_column, prediction_column=prediction_column) | |
basic_evaluator = combined_evaluators(evaluators=[ | |
auc_evaluator(**eval_args), | |
logloss_evaluator(**eval_args), | |
precision_evaluator(**eval_args), | |
recall_evaluator(**eval_args) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Check docs here https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html | |
vectorizer_params = dict(decode_error="replace", | |
lowercase=True, | |
stop_words=nltk.corpus.stopwords.words("portuguese"), | |
ngram_range=(1, 3), | |
strip_accents=None) | |
# Check docs here https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html | |
logistic_params = dict(random_state=42, | |
n_jobs=-1, | |
solver="lbfgs") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fklearn.training.classification import nlp_logistic_classification_learner | |
from fklearn.training.pipeline import build_pipeline | |
from fklearn.training.utils import log_learner_time | |
def training_pipeline(text_cols, target_column, vectorizer_params, logistic_params): | |
return log_learner_time( | |
build_pipeline( | |
nlp_logistic_classification_learner( | |
text_feature_cols=text_cols, | |
target=target_column, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fklearn.validation.evaluators import auc_evaluator, logloss_evaluator, precision_evaluator, recall_evaluator, \ | |
combined_evaluators, temporal_split_evaluator | |
def tweet_eval(target_column, prediction_column, time_column): | |
eval_args = dict(target_column=target_column, prediction_column=prediction_column) | |
basic_evaluator = combined_evaluators(evaluators=[ | |
auc_evaluator(**eval_args), | |
logloss_evaluator(**eval_args), | |
precision_evaluator(**eval_args), | |
recall_evaluator(**eval_args) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
np.random.seed(42) | |
df = (dataset | |
.rename(columns={"Created At": "publication_date", | |
"Text": "tweet", | |
"Retweet Count" : "num_retweets", | |
"Username": "username", | |
"Classificacao": "sentiment"}) | |
.loc[:, ["publication_date", "tweet", "num_retweets", "username", "sentiment"]]) | |
df["publication_date"] = pd.to_datetime(df["publication_date"],infer_datetime_format=True) | |
df["sentiment"] = df["sentiment"].replace({"Negativo": 0, "Neutro": np.random.choice([0, 1]), "Positivo": 1}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@app.route("/display-snack") | |
@login_required | |
def display_snack(): | |
my_database = mongo[DATABASE] | |
my_file = open("snacks/snacks.json", "rb") | |
parsed = json.loads(my_file.read().decode('unicode-escape')) | |
snacks = parsed | |
s = snacks | |
Snack.objects.delete() | |
for s in snacks: |