Skip to content

Instantly share code, notes, and snippets.

@ranchodeluxe
Last active November 18, 2024 23:11
Show Gist options
  • Save ranchodeluxe/5b62681954ac540d3b56bab9c22c8412 to your computer and use it in GitHub Desktop.
Save ranchodeluxe/5b62681954ac540d3b56bab9c22c8412 to your computer and use it in GitHub Desktop.
resolving addresses
from postal.expand import expand_address
import xxhash
from sqlalchemy import create_engine
import pandas as pd
import sys
DATABASE_URI = "postgresql://kaiizenadmin:[email protected]/kaiizen"
engine = create_engine(DATABASE_URI)
def match_address_expansion(address):
expansions = expand_address(address)
expansion_hashes = [xxhash.xxh64(ex).intdigest() for ex in expansions]
query = """
SELECT *
FROM properties_address_expansions
WHERE expansion_hash = ANY(%(expansion_hashes)s)
"""
matches = pd.read_sql(query, engine, params={"expansion_hashes": expansion_hashes})
return matches
def get_comps_raw(address_expansions):
"""
:param address_expansions:
:return:
"""
if address_expansions.loc[0].dwellsy_pk is None:
query = """
SELECT
DISTINCT(pae.cherre_pk),
ps.pk AS cherre_pk,
ps.addr AS properties_addr,
ps.addr_city AS properties_city,
ps.addr_state AS properties_state,
ps.addr_zip AS properties_zip,
ps.last_sale_price
FROM
properties_address_expansions pae
LEFT JOIN
properties_simple ps ON pae.cherre_pk = ps.pk
WHERE
pae.cherre_pk = %(cherre_pk)s
"""
comps = pd.read_sql(
query,
engine,
params={
"cherre_pk": int(address_expansions.loc[0].cherre_pk)
}
)
else:
query = """
SELECT
DISTINCT(pae.cherre_pk),
ps.pk AS cherre_pk,
ps.addr AS properties_addr,
ps.addr_city AS properties_city,
ps.addr_state AS properties_state,
ps.addr_zip AS properties_zip,
ps.last_sale_price,
ls.pk AS dwellsy_pk,
# ls.addr AS listings_addr,
# ls.addr_city AS listings_city,
# ls.addr_state AS listings_state,
# ls.addr_zip AS listings_zip,
ls.listing_amount,
ls.listing_deposit
FROM
properties_address_expansions pae
LEFT JOIN
properties_simple ps ON pae.cherre_pk = ps.pk
LEFT JOIN
listings_simple ls ON pae.dwellsy_pk = ls.pk
WHERE
pae.dwellsy_pk = %(dwellsy_pk)s AND
pae.cherre_pk = %(cherre_pk)s
"""
comps = pd.read_sql(
query,
engine,
params={
"dwellsy_pk": int(address_expansions.loc[0].dwellsy_pk),
"cherre_pk": int(address_expansions.loc[0].cherre_pk)
}
)
return comps
if __name__ == "__main__":
all_address = """1609 Carter Rd. Decatur GA 30032
2485 Hillside ave. Decatur GA 30032
1427 Beaverton Ave Cincinnati OH 45237
513 Jackson NW Piedmont OK 73078
10313 N McKinley Oklahoma City OK 73114
14875 Arrowhead Dr Yukon OK 73099
7650 Micawber Court, Indianapolis, IN 46256 Indianapolis IN 46256
403 Rochester Avenue Middlesboro KY 40965
3430 Rocky Springs Ct Marietta GA 30062
950 Sugar Meadow Dr Sugar Hill GA 30518
5114 Crestway Dr, La Porte TX 77571
1726 Woodsong Lane Knoxville TN 37914
221 Botany Dr, Concord NC Concord NC 28027
10415 Gloria St Gibsonton FL 33534
2859 Alexandria Dr SW Atlanta GA 30331
""".split("\n")
for addr in all_address:
print(addr)
expansions = match_address_expansion(addr)
print(expansions)
if expansions:
comps = get_comps_raw(expansions)
print(comps)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment