Last active
August 7, 2021 19:33
-
-
Save jlinoff/847089bab1ed749533f1d9e2e17e09a1 to your computer and use it in GitHub Desktop.
jenkins tools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# pylint: disable=line-too-long | |
r''' | |
Extract the Jenkins build data into a JSON file for insertion into a | |
database where it can be used for graphing in grafana. | |
The following environment variables control the behavior of this program. | |
JFN Name of the text file to write all of the job names to. | |
This is mainly used for debugging. | |
The default is not to write the job names. | |
Example: JFN=/tmp/alljobs.txt | |
EFN Name of the error file. | |
This JSON file contains the errors that were caught. | |
Default: 'errs.json' | |
DAYS The number of days (last N days) to process. The default is 30. | |
BEG Begin date in ISO 8601 format. Default is DAYS days ago. | |
END End date in ISO 8601 format. Default is now. | |
MAX Maximum number of builds per job. Useful for cases | |
where there are jobs thousands of builds. The default is 0 (all). | |
MAX=1 retrieves the latest build for each branch. | |
choose MAX=1 DAYS=1825 to get the latest build searching back 5 years. | |
choose MAX=1 DAYS=3650 to get the latest build searching back 10 years. | |
NOWARN NOWARN=1 turns off warnings. | |
TIMEOUT Sets the timeout in seconds. The default is 300. | |
This will vary considerably for each site. | |
I typically use 7200 for my site. | |
PREFIX The top level prefix regex for job names. The default is '^eng'. | |
This is also site specific. | |
VERBOSE VERBOSE=1 turns on verbose mode, status messages are written | |
to stderr. VERBOSE=2 turns on very verbose mode. | |
Args: | |
credentials_file The JSON file containing the credentials. A dictionary with three fields: | |
username, password, url. | |
output_file THe output JSON file containing the data. | |
Usage: | |
$ time VERBOSE=1 TIMEOUT=7200 ./extract.py creds.json eng-all.json # very long time | |
$ time VERBOSE=1 TIMEOUT=7200 MAX=1000 ./extract.py creds.json eng-all-1000.json | |
$ time VERBOSE=2 DAYS=60 TIMEOUT=7200 MAX=1000 ./extract.py creds.json extract-60.json | |
$ time VERBOSE=2 DAYS=7 TIMEOUT=7200 ./extract.py creds.json eng-7.json | |
$ time VERBOSE=2 PREFIX='^eng/platform' DAYS=7 TIMEOUT=7200 ./extract.py creds.json eng.json | |
$ time VERBOSE=1 PREFIX='^eng/platform' DAYS=7 TIMEOUT=7200 ./extract.py jcreds.json eng.json | |
$ time VERBOSE=1 PREFIX='^eng/platform-pipe' DAYS=30 TIMEOUT=7200 ./extract.py jcreds.json eng.json | |
$ time VERBOSE=1 PREFIX='^eng' DAYS=30 TIMEOUT=7200 ./extract.py jcreds.json eng-30.json | |
$ time VERBOSE=1 PREFIX='^eng' DAYS=60 TIMEOUT=7200 ./extract.py jcreds.json eng-60.json | |
$ time VERBOSE=1 PREFIX='^eng' DAYS=90 TIMEOUT=7200 ./extract.py jcreds.json eng-90.json | |
$ # Get the latest builds for all branches and all jobs. | |
$ time VERBOSE=1 PREFIX='^eng' NOWARN=1 DAYS=3650 MAX=1 TIMEOUT=7200 ./extract.py jcreds.json eng-latest.json | |
The output file is a JSON file with an array of build entries, | |
each with the following fields: | |
1. 'status' Status string: SUCCESS, FAILURE, ABORTED, etc. | |
3. 'timestamp' Timestamp in ISO 8601 format. | |
4. 'duration' Duration seconds. | |
5. 'running' Is the build runing flag. | |
6. 'url' The build URL. | |
7. 'project' The build project. | |
8. 'branch' The build branch. | |
9. 'number' The build number. | |
It can be further processed to insert the data into a database table. | |
''' | |
# pylint: enable=line-too-long | |
import datetime | |
import inspect | |
import json | |
import os | |
import re | |
import sys | |
from getpass import getpass | |
from typing import Tuple, TextIO, List | |
from jenkinsapi.jenkins import Jenkins # pylint: disable=import-error | |
import requests | |
import dateutil.parser # pylint: disable=import-error | |
# Module variables. | |
VERBOSE = int(os.getenv('VERBOSE', '0')) | |
JFN = os.getenv('JFN', '') | |
EFN = os.getenv('EFN', 'errs.json') | |
MAX = int(os.getenv('MAX', '0')) | |
NOW = datetime.datetime.utcnow() | |
DAYS = int(os.getenv('DAYS', '30')) | |
DAYS_AGO = datetime.timedelta(days=DAYS) | |
BEG = os.getenv('BEG', (NOW - DAYS_AGO).isoformat(timespec='seconds') + '+00:00') | |
END = os.getenv('END', NOW.isoformat(timespec='seconds') + '+00:00') | |
BEG_DATE = dateutil.parser.parse(BEG) # pylint: disable=no-member | |
END_DATE = dateutil.parser.parse(END) # pylint: disable=no-member | |
PREFIX = os.getenv('PREFIX', '^eng') | |
TIMEOUT = float(os.getenv('TIMEOUT', '300')) | |
NOWARN = bool(int(os.getenv('NOWARN', '0'))) | |
def info(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1): | |
'''Print a verbose info message. | |
Args: | |
msg: The message | |
ofp: The output file. | |
end: The message terminator. | |
level: The level of the caller in the stack. | |
''' | |
lnum = inspect.stack()[level].lineno | |
print(f'INFO:{lnum}: {msg}', file=ofp, end=end) | |
def warn(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1): | |
'''Print a verbose warning message. | |
Args: | |
msg: The message | |
ofp: The output file. | |
end: The message terminator. | |
level: The level of the caller in the stack. | |
''' | |
if not NOWARN: | |
print('\x1b[35m', end='', file=ofp) | |
lnum = inspect.stack()[level].lineno | |
print(f'WARNING:{lnum}: {msg}', file=ofp, end=end) | |
print('\x1b[0m', end='', file=ofp) | |
def err(msg: str, ofp: TextIO = sys.stderr, end='\n', level=1): | |
'''Error message. | |
Args: | |
msg: The message | |
ofp: The output file. | |
end: The message terminator. | |
level: The level of the caller in the stack. | |
''' | |
lnum = inspect.stack()[level].lineno | |
print('\x1b[31m', end='', file=ofp) | |
print(f'ERROR:{lnum}: {msg}', file=ofp, end=end) | |
print('\x1b[0m', end='', file=ofp) | |
def get_jenkins_creds(fname: str) -> Tuple[str,str,str]: | |
''' | |
Load the jenkins credentials into a dict object | |
for use with get_jenkins_server function. | |
There are three keys: username, password and url. | |
Behind a firewall the credentials password is normally a token. | |
That is obtained by creating a global credentials user in jenkins. | |
If any of the fields are missing, prompt for them. | |
Returns: | |
creds: dictionary of the jenkins credentials | |
''' | |
creds = { | |
'username': '', | |
'password': '', | |
'url': '', | |
} | |
with open(fname) as ifp: | |
string = ifp.read().strip() | |
try: | |
creds = json.loads(string) | |
except json.decoder.JSONDecodeError as exc: | |
err(f'failed to decode credentials file: {fname}: {exc}') | |
# Prompt for missing fields. | |
for key in ['username', 'password', 'url']: | |
if key in creds and not creds[key]: | |
if key == 'password': | |
creds[key] = getpass(f'{key.capitalize()}? ') | |
else: | |
creds[key] = input(f'{key.capitalize()}? ') | |
# Verify no bad input. | |
for key in ['username', 'password', 'url']: | |
if key in creds and not creds[key]: | |
err(f'missing value for "{key}" in {fname}') | |
sys.exit(1) | |
return creds | |
def get_jenkins_server(creds: dict) -> Jenkins: | |
''' | |
Get the jenkins server object. | |
It is used for all subsequent interface calls. | |
The object construction loads ALL of the jobs when not in lazy mode | |
which can take a few minutes. | |
Args: | |
creds: The login credentials in a dict with username, password and url. | |
Returns: | |
server: The jenkins server object. | |
''' | |
url = creds['url'] | |
if VERBOSE: | |
info(f'loading jenkins server data from {url}') | |
start = datetime.datetime.now() | |
server = Jenkins(username=creds['username'], | |
password=creds['password'], | |
baseurl=url, | |
timeout=TIMEOUT, | |
lazy=True) | |
if VERBOSE: | |
elapsed = datetime.datetime.now() - start | |
info(f'loaded jenkins server in {elapsed.total_seconds()} seconds') | |
return server | |
def get_server_jobs_names(server: Jenkins) -> List[str]: | |
'''Get the jenkins job names. | |
Args: | |
server: The jenkins server object. | |
Returns: | |
list: The list of job names. | |
''' | |
if VERBOSE: | |
info('loading server job names') | |
start = datetime.datetime.now() | |
job_names = server.keys() | |
if JFN: | |
if VERBOSE: | |
info(f'writing {len(job_names)} job names to "{JFN}"') | |
with open(JFN, 'w') as ofp: | |
for job_name in job_names: | |
ofp.write(job_name + '\n') | |
if VERBOSE: | |
elapsed = datetime.datetime.now() - start | |
info(f'loaded {len(job_names)} jobs names in {elapsed.total_seconds()} seconds') | |
return job_names | |
def write_failed_errors(failed: list): | |
'''Save the failures for later debugging. | |
Args: | |
failed: The list of failures. | |
''' | |
if failed and EFN: | |
err(f'{len(failed)} processing failures written to "{EFN}"\n') | |
with open(EFN, 'w') as efp: | |
efp.write(json.dumps(failed, indent=4) + '\n') | |
def process_jobs(server: Jenkins, ofp: TextIO): # pylint: disable=too-many-locals,too-many-branches, too-many-statements | |
'''Process jobs. | |
Args: | |
server: The jenkins server object. | |
ofp: The output file pointer. | |
''' | |
start = datetime.datetime.now() | |
prefix = re.compile(PREFIX) | |
job_names = get_server_jobs_names(server) | |
# don't user server.get_jobs() in the for-loop because it would | |
# retrieve job info objects for all jobs even the filtered ones | |
# which would affect performance in cases where a subset of jobs | |
# is desired. | |
failed = [] | |
recs = { | |
'meta': { | |
'date': datetime.datetime.now().isoformat(timespec='seconds'), | |
'days': DAYS, | |
'beg': BEG, | |
'end': END, | |
'timeout': TIMEOUT, | |
'prefix': PREFIX, | |
'max': MAX, | |
'efn': EFN, | |
'jfn': JFN, | |
'time': 0, | |
'errors': [], | |
}, | |
'builds': [] | |
} | |
njobs = len(job_names) | |
for i, job_name in enumerate(job_names, start=1): | |
if not prefix.search(job_name): | |
if VERBOSE > 1: | |
warn(f"skipping job name that doesn't match '{PREFIX}': '{job_name}' {i} of {njobs}") | |
continue | |
if VERBOSE: | |
jrem = njobs - i | |
info(f'\x1b[1mJOB: {job_name} {i} of {njobs} ({jrem})\x1b[0m') | |
_top, project, branch = job_name.split('/') | |
# get the job information. | |
try: | |
job_instance = server.get_job(job_name) | |
except requests.exceptions.HTTPError as exc: | |
err(f'request timed out for server.get_job(job_name): {exc}') | |
failed.append({'type': 'job', 'name': job_name, 'exc': str(exc)}) | |
continue | |
# get the build_ids | |
try: | |
build_ids = list(job_instance.get_build_ids()) | |
if MAX: | |
ntrunc = len(build_ids) - MAX if len(build_ids) > MAX else 0 | |
if ntrunc > 0: | |
warn(f"MAX={MAX} skipping {ntrunc} jobs out of {len(build_ids)}") | |
build_ids = build_ids[:MAX] # truncate | |
except requests.exceptions.HTTPError as exc: | |
err(f'request timed out for job_instance.get_build_ids(): {exc}') | |
failed.append({'type': 'job', 'name': job_name, 'exc': str(exc)}) | |
continue | |
nbuilds = len(build_ids) | |
for j, build_id in enumerate(build_ids, start=1): | |
# get the build information | |
if VERBOSE: | |
rem = nbuilds - j | |
info(f'BUILD: {len(recs)+1} {job_name}/{build_id} {j} of {nbuilds} ({rem})') | |
try: | |
build = job_instance.get_build(build_id) | |
except requests.exceptions.HTTPError as exc: | |
err(f'request timed out for job_instance.get_build(build_id): {exc}') | |
failed.append({'type': 'job', 'name': job_name, 'build_id': build_id, 'exc': exc}) | |
continue | |
if VERBOSE: | |
info(f'STATUS: {build.get_status()} {build.get_build_url()}') | |
dts = build.get_timestamp().isoformat(timespec='seconds') | |
status = build.get_status() if build.get_status() else 'RUNNING' | |
rec = { | |
'project': project, | |
'branch': branch, | |
'number': build_id, | |
'running': build.is_running(), | |
'status': status, | |
'duration': build.get_duration().total_seconds(), | |
'timestamp': dts, | |
'url': build.get_build_url() | |
} | |
if VERBOSE > 1: | |
info(f'DTS: {dts} {BEG} {END}') | |
info(json.dumps(rec, indent=4)) | |
if dts < BEG: | |
if VERBOSE: | |
warn(f'too old - skipping {rem} builds earlier than BEG for this job: ' | |
f'{dts} < {BEG}') | |
break # all subsequent builds are earlier | |
if dts > END: | |
if VERBOSE: | |
warn(f'too new - skipping builds later than END for this job: {dts} > {END}') | |
continue # do not break!! need to get the earlier dates | |
recs['builds'].append(rec) | |
ofp.write(json.dumps(recs)) | |
elapsed = datetime.datetime.now() - start | |
recs['meta']['time'] = elapsed.total_seconds() | |
recs['meta']['errors'] = failed | |
if VERBOSE: | |
info(f'extracted {len(recs["builds"])} build records in {elapsed.total_seconds()} seconds') | |
write_failed_errors(failed) | |
def main(): | |
'''main entry point | |
''' | |
cfile = '' | |
ofp = sys.stdout | |
if len(sys.argv) > 1: | |
cfile = sys.argv[1] | |
if len(sys.argv) > 2: | |
ofp = open(sys.argv[2], 'w') # pylint: disable=consider-using-with | |
if VERBOSE: | |
ofn = sys.argv[2] if len(sys.argv) > 2 else 'sys.stdout' | |
info('Parameters') | |
info(f' DAYS : {DAYS}') | |
info(f' BEG : {BEG}') | |
info(f' END : {END}') | |
info(f' MAX : {MAX}') | |
info(f' PREFIX : "{PREFIX}"') | |
info(f' TIMEOUT : {TIMEOUT}') | |
info(f' VERBOSE : {VERBOSE}') | |
info(f' EFN : {EFN}') | |
info(f' JFN : {JFN}') | |
info(f' ofn : "{ofn}"') | |
creds = get_jenkins_creds(cfile) | |
if VERBOSE: | |
info(f' username : {creds["username"]}') | |
info(f' url : {creds["url"]}') | |
server = get_jenkins_server(creds) | |
process_jobs(server, ofp) | |
if ofp != sys.stdout: | |
ofp.close() | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Convert extract format files into SQL. | |
Typical usage: | |
$ time /convert.py extract.json builds builds.sql | |
$ # ^ ^ ^ | |
$ # | | +---- output file name | |
$ # | +----------- SQL table name | |
$ # +------------------------ input file name | |
''' | |
import json | |
import sys | |
from typing import TextIO | |
def tdef(tname: str, meta: str, ofp: TextIO=sys.stdout): | |
'''Output the table definition | |
Args: | |
tname: The table name | |
meta: The meta table name | |
ofp: The output file pointer | |
''' | |
ofp.write(f''' | |
DROP TABLE IF EXISTS {tname} CASCADE; | |
CREATE TABLE IF NOT EXISTS {tname} ( | |
id SERIAL PRIMARY KEY, | |
status TEXT NOT NULL, | |
time TIMESTAMPTZ NOT NULL, | |
duration DECIMAL NOT NULL, | |
project TEXT NOT NULL, | |
branch TEXT NOT NULL, | |
number DECIMAL NOT NULL, | |
url TEXT NOT NULL | |
); | |
COMMENT ON TABLE {tname} IS 'Jenkins build data'; | |
COMMENT ON COLUMN {tname}.status IS 'Result like SUCCESS or FAILURE'; | |
COMMENT ON COLUMN {tname}.time IS 'Build completed timestamp'; | |
COMMENT ON COLUMN {tname}.duration IS 'Build duration in seconds'; | |
COMMENT ON COLUMN {tname}.project IS 'Build project name'; | |
COMMENT ON COLUMN {tname}.branch IS 'Build branch name: "PR-1"'; | |
COMMENT ON COLUMN {tname}.number IS 'Build number'; | |
COMMENT ON COLUMN {tname}.url IS 'Build URL'; | |
DROP TABLE IF EXISTS {meta} CASCADE; | |
CREATE TABLE IF NOT EXISTS {meta} ( | |
id SERIAL PRIMARY KEY, | |
key TEXT NOT NULL, | |
value TEXT NOT NULL | |
); | |
COMMENT ON TABLE {meta} IS 'Jenkins build meta data'; | |
COMMENT ON COLUMN {meta}.key IS 'Key name'; | |
COMMENT ON COLUMN {meta}.value IS 'Key value'; | |
''') | |
def populate_meta(meta: str, recs: dict, ofp: TextIO=sys.stdout): | |
'''Populate the meta table. | |
Args: | |
meta: The meta table name | |
recs: The extracr JSON data | |
ofp: The output file pointer | |
''' | |
ofp.write(f''' | |
INSERT INTO {meta} (key, value) | |
VALUES | |
''') | |
for i, key in enumerate(recs['meta']): | |
if i: | |
ofp.write(',\n') | |
value = recs['meta'][key] | |
ofp.write(f'''\ | |
('{key}', '{value}')\ | |
''') | |
ofp.write(';\n') | |
def populate_builds(tname: str, recs: dict, ofp: TextIO=sys.stdout): | |
'''Populate the builds data | |
Args: | |
tname: The table name | |
recs: The extracr JSON data | |
ofp: The output file pointer | |
''' | |
ofp.write(f''' | |
INSERT INTO {tname} (status, time, duration, project, branch, number, url) | |
VALUES | |
''') | |
for i, rec in enumerate(recs['builds']): | |
status = rec['status'] | |
timestamp = rec['timestamp'] | |
duration = int(rec['duration']) | |
project = rec['project'] | |
branch = rec['branch'] | |
number = rec['number'] | |
url = rec['url'] | |
if not timestamp.endswith('+00:00'): | |
timestamp += '+00:00' | |
if i: | |
ofp.write(',\n') | |
ofp.write(f'''\ | |
('{status}', '{timestamp}', {duration}, '{project}', '{branch}', '{number}', '{url}')\ | |
''') | |
ofp.write(';\n') | |
def main(): | |
'''main | |
''' | |
ifn = sys.argv[1] # input file name | |
tname = sys.argv[2] # table name | |
ofn = sys.argv[3] # output file name | |
meta = f'{tname}_meta' | |
with open(ifn) as ifp: | |
recs = json.loads(ifp.read()) | |
with open(ofn, 'w') as ofp: | |
tdef(tname, meta, ofp) | |
populate_meta(meta, recs, ofp) | |
populate_builds(tname, recs, ofp) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment