nklamann · November 18, 2024 09:53
diff --git a/README.md b/README.md
diff --git a/pl_json_data.py b/pl_json_data.py
 """Write all JSON data from a paperless instance to local JSON files"""

 import os
 import json

 import requests  # pylint: disable=import-error

 from pl_secrets import TK, URL

 ## pl_secrets.py example:
 # TK="superuser-token"
 # URL="<BASE_URL>/api/?format=json"

 HEADERS = {"Authorization": "Token  " + TK}
 FILEROOT_DATA = os.path.join(".", "data")
 GET_ALL_DATA = False


 def get_api_dict() -> dict:
    """Get a dictionary of all api endpints

    Returns:
        api_d: the result of the XXX/api/?format=json call
    """
    r = requests.get(URL, headers=HEADERS)
    api_d = r.json()
    return api_d


 def concat_results(next_url: str, level: int = 1) -> dict:
    """Recursively amend the list of 'results' with the 'concat_results' from the following page
    Args:
        level (int): The level of recursion, for diagnostic purposes
        next_url (str): the url in 'next'

    Returns:
        dict: the 'results' of this page and the 'concat_result' of all the follwing pages
    """
    next_level = level + 1
    r = requests.get(next_url, headers=HEADERS)
    d = r.json()
    concat_result = d["results"]
    if d["next"]:
        concat_result = concat_result + concat_results(
            next_url=d["next"], level=next_level
        )
    # print (f"{next_level * '-' }  {next_level} {len(concat_result )}")
    return concat_result


 def write_data(request_data: dict, api_name: str, get_all_data: bool = True) -> None:
    """Write the data from the api endpoint

    Write out , if data is not complete (next url is not None)

    Args:
        request_data (dict): the result of the request to the api endpoint
        api_name (str): the api name
        get_all_data (bool): get all data revcursively
    """
    try:
        os.mkdir(path=FILEROOT_DATA)
    except FileExistsError:
        pass
    rqd = request_data  # we might need to manipulate the data, so we copy
    fname_data = os.path.join(FILEROOT_DATA, f"{api_name}.json")
    try:
        if rqd["next"] and get_all_data:  # follow the next url
            rqd["results"] = rqd["results"] + concat_results(next_url=rqd["next"])
        elif rqd["next"] and (not get_all_data):  # results are NOT complete
            print(f"--> {fname_data} date not complete without recursion")
        elif not rqd["next"]:  # results are
            pass
    except (KeyError, TypeError):
        print(f"--> {fname_data} (JSON has no element for results)")
    else:
        print(f"--> {fname_data} ({len(rqd['results'])} results)")
    with open(fname_data, mode="w", encoding="utf-8") as f:
        json.dump(obj=rqd, fp=f, indent=2)


 def process_api(api_name: str, api_url: str, get_all_data: bool = True):
    """Save the data for the api as json

    Args:
        api_name (str): name of the api according tpo top-level api call
        api_url (str): URL of the api
    get_all_data (bool): get all data revcursively
    """
    r = requests.get(api_url, headers=HEADERS)
    data_d = r.json()
    print(api_name)
    write_data(request_data=data_d, api_name=api_name, get_all_data=get_all_data)


 if __name__ == "__main__":
    apis = get_api_dict()
    for name, url in apis.items():
        process_api(api_name=name, api_url=url, get_all_data=GET_ALL_DATA)
diff --git a/pl_secrets.py b/pl_secrets.py
 TK="superuser-token"
 URL="<BASE_URL>/api/?format=json"
	"""Write all JSON data from a paperless instance to local JSON files"""

	import os
	import json

	import requests # pylint: disable=import-error

	from pl_secrets import TK, URL

	## pl_secrets.py example:
	# TK="superuser-token"
	# URL="<BASE_URL>/api/?format=json"

	HEADERS = {"Authorization": "Token " + TK}
	FILEROOT_DATA = os.path.join(".", "data")
	GET_ALL_DATA = False


	def get_api_dict() -> dict:
	"""Get a dictionary of all api endpints

	Returns:
	api_d: the result of the XXX/api/?format=json call
	"""
	r = requests.get(URL, headers=HEADERS)
	api_d = r.json()
	return api_d


	def concat_results(next_url: str, level: int = 1) -> dict:
	"""Recursively amend the list of 'results' with the 'concat_results' from the following page
	Args:
	level (int): The level of recursion, for diagnostic purposes
	next_url (str): the url in 'next'

	Returns:
	dict: the 'results' of this page and the 'concat_result' of all the follwing pages
	"""
	next_level = level + 1
	r = requests.get(next_url, headers=HEADERS)
	d = r.json()
	concat_result = d["results"]
	if d["next"]:
	concat_result = concat_result + concat_results(
	next_url=d["next"], level=next_level
	)
	# print (f"{next_level * '-' } {next_level} {len(concat_result )}")
	return concat_result


	def write_data(request_data: dict, api_name: str, get_all_data: bool = True) -> None:
	"""Write the data from the api endpoint

	Write out , if data is not complete (next url is not None)

	Args:
	request_data (dict): the result of the request to the api endpoint
	api_name (str): the api name
	get_all_data (bool): get all data revcursively
	"""
	try:
	os.mkdir(path=FILEROOT_DATA)
	except FileExistsError:
	pass
	rqd = request_data # we might need to manipulate the data, so we copy
	fname_data = os.path.join(FILEROOT_DATA, f"{api_name}.json")
	try:
	if rqd["next"] and get_all_data: # follow the next url
	rqd["results"] = rqd["results"] + concat_results(next_url=rqd["next"])
	elif rqd["next"] and (not get_all_data): # results are NOT complete
	print(f"--> {fname_data} date not complete without recursion")
	elif not rqd["next"]: # results are
	pass
	except (KeyError, TypeError):
	print(f"--> {fname_data} (JSON has no element for results)")
	else:
	print(f"--> {fname_data} ({len(rqd['results'])} results)")
	with open(fname_data, mode="w", encoding="utf-8") as f:
	json.dump(obj=rqd, fp=f, indent=2)


	def process_api(api_name: str, api_url: str, get_all_data: bool = True):
	"""Save the data for the api as json

	Args:
	api_name (str): name of the api according tpo top-level api call
	api_url (str): URL of the api
	get_all_data (bool): get all data revcursively
	"""
	r = requests.get(api_url, headers=HEADERS)
	data_d = r.json()
	print(api_name)
	write_data(request_data=data_d, api_name=api_name, get_all_data=get_all_data)


	if __name__ == "__main__":
	apis = get_api_dict()
	for name, url in apis.items():
	process_api(api_name=name, api_url=url, get_all_data=GET_ALL_DATA)