Skip to content

Instantly share code, notes, and snippets.

View Idan707's full-sized avatar

Idan Benaun Idan707

View GitHub Profile
@Idan707
Idan707 / browseruse_reddit.py
Last active January 14, 2025 12:51
This code performs automated scrolling and analysis of Reddit posts in the r/sidehustle subreddit for relevance to AI and prompt engineering, using a browser automation tool, a controller for managing tasks, and structured output for saving results
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from browser_use import ActionResult, Agent, Controller
from browser_use.browser.context import BrowserContext
from browser_use.browser.browser import Browser, BrowserConfig
import asyncio
import os
import json
import re
###########################################
# Suppress matplotlib user warnings
# Necessary for newer version of matplotlib
import warnings
warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
#
# Display inline matplotlib plots with IPython
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
###########################################
###########################################
# Suppress matplotlib user warnings
# Necessary for newer version of matplotlib
import warnings
warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
#
# Display inline matplotlib plots with IPython
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
###########################################
import numpy as np
import itertools
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.winter):
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
@Idan707
Idan707 / get_model_results.py
Created January 14, 2018 09:07
get_model_results
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import roc_auc_score, roc_curve
def find_best_threshold(thresholds, fpr, tpr):
"""
find the best threshold from the roc curve. by finding the threshold for the point which is closest to (fpr=0,tpr=1)
"""
fpr_tpr = pd.DataFrame({'thresholds': thresholds, 'fpr': fpr, 'tpr': tpr})
@Idan707
Idan707 / get_most_correlated_variables.py
Last active January 14, 2018 09:05
get_most_correlated_variables and plot_correlation_matrix
def get_most_correlated_variables(corr, num_pairs=10):
correlation_melted = pd.melt(corr.reset_index().rename(columns={"index": "var_1"}), id_vars=("var_1"),var_name='var_2')
correlation_melted = correlation_melted[correlation_melted.var_1!=correlation_melted.var_2]
correlation_melted['var_couple'] = correlation_melted[['var_1','var_2']].apply(lambda x:tuple(sorted([x[0],x[1]])), axis=1)
correlation_melted = correlation_melted.drop_duplicates(subset='var_couple').drop(['var_couple'],axis=1)
correlation_melted['abs_value'] = correlation_melted['value'].abs().round(3)
return correlation_melted.sort_values(by='abs_value').tail(num_pairs).drop('abs_value', axis=1).reset_index(drop=True)
def plot_correlation_matrix(X, features2):
corr = X[features2].corr()
def describe_categorical_values(df, non_interesting_columns=[], num_categories=5):
values_df = pd.DataFrame()
for i, column in enumerate(df.columns):
if column in non_interesting_columns:
continue
top_values0 = ["{}: {}%".format(x,int(round(100*y/len(df))))
for x, y in zip(df[column].value_counts(dropna=False).head(num_categories).index,
df[column].value_counts(dropna=False).head(num_categories).values)]
if len(top_values0) < num_categories:
top_values = [None]*num_categories
@Idan707
Idan707 / IMDB_scraper.R
Last active December 27, 2017 21:20
IMDB scraper - part of movie revenue prediction with ML\DL
####################
# Import libraries
####################
library(rvest)
library(XML)
library(xml2)
library(stringr)
library(data.table)
####################