- The Python Standard Library, especially str.methods and string module are powerful for text processing. Start there.
- regex - Extends Python's Standard Library
re
module while being backwards-compatible. - chardet - Finds character encoding.
- ftfy - Take in bad Unicode and output good Unicode. Seriously automagical.
- ploygot - Helpful for multilingual preprocessing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from collections import Counter | |
def calculate_mutual_information(words, bigrams): | |
# Count the frequency of individual words and bigrams | |
word_counts = Counter(words) | |
bigram_counts = Counter(bigrams) | |
# Calculate the total number of words and bigrams | |
total_words = sum(word_counts.values()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table if exists linear_test; | |
create table linear_test ( | |
x numeric(19,4), | |
y numeric(19,4), | |
workspace_id text | |
); | |
insert into linear_test (x,y, workspace_id) values (1, 10, 'a'), (2, 20, 'a'), (3, 30, 'a'), (4, 40, 'a'), (5, 50, 'a'); | |
insert into linear_test (x,y, workspace_id) values (5, 10, 'b'), (6, 20, 'b'), (7, 30, 'b'), (8, 40, 'b'), (9, 50, 'b'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '2' | |
networks: | |
kafka: | |
driver: bridge | |
services: | |
zookeeper-1: | |
image: confluentinc/cp-zookeeper:latest | |
hostname: zookeeper-1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '2' | |
networks: | |
kafka: | |
driver: bridge | |
services: | |
zookeeper-1: | |
image: confluentinc/cp-zookeeper:latest | |
hostname: zookeeper-1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Purging All Unused or Dangling Images, Containers, Volumes, and Networks | |
docker system prune -a | |
# Remove all images | |
docker rmi $(docker images -a -q) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
def verify_partial_valid_solution(A,w): | |
if len(w[0]) > 2: | |
return False | |
w1 = sum(w[1]) | |
w2 = sum(w[2]) | |
w3 = sum(w[3]) | |
a = sum(A) | |
if a + w1 < w2: return False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# parse iso time | |
datetime.strptime('2019-04-17T09:02:16.00428Z', '%Y-%m-%dT%H:%M:%S.%fZ') | |
import itertools | |
def grouper(iterable, n): | |
it = iter(iterable) | |
while True: | |
chunk = tuple(itertools.islice(it, n)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlalchemy | |
import sys | |
def print_clear_dag(dag_id): | |
"""Clear all information for a DAG from airflow postgres database""" | |
list_tables = ['xcom', 'task_instance', 'sla_miss', 'log', 'job', 'dag_run', 'dag', | |
'dag_stats', 'task_fail'] | |
for table in list_tables: | |
queries = ["DELETE FROM {} WHERE dag_id='{}'".format(table, dag_id), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--------------------------------- Process | |
SELECT * FROM pg_stat_activity WHERE state = 'active'; | |
SELECT pg_cancel_backend(<pid of the process>) | |
SELECT pg_terminate_backend(<pid of the process>) | |
--------------------------------- LOCKS | |
Ref: https://wiki.postgresql.org/wiki/Lock_Monitoring | |
SELECT relation::regclass, * FROM pg_locks WHERE NOT GRANTED; |
NewerOlder