Created
March 3, 2023 13:49
-
-
Save Ishmam156/44d6f69ee37036d3ea6b936ff0a164ef to your computer and use it in GitHub Desktop.
Python script to format an excel file according to your needs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openpyxl | |
import xlwings as xw | |
# Main variables for the transformation | |
starting_file_location = 'your_original_file.xlsx' | |
sheet_to_use = 'name_of_sheet_of_original_file' | |
formatted_file_location = 'new_file_name.xlsx' | |
column_to_use = 'G' # Column that will be used for creating new sheets | |
column_to_sort = 'E' | |
min_column = 'I' | |
avg_column = 'J' | |
max_column = 'K' | |
# Load the original Excel file | |
original_workbook = openpyxl.load_workbook(starting_file_location) | |
def create_graded_excel(original_file, sheet_to_use, column_to_use, column_to_sort, min_column, avg_column, max_column): | |
print('Creating new workbook!') | |
ws = original_file[sheet_to_use] | |
# Get unique values from column G (Corporate Grade) and sort in ascending order | |
unique_values = sorted(set([cell.value for cell in ws[column_to_use][1:] if cell.value != None])) | |
# Create new Excel file | |
new_wb = openpyxl.Workbook() | |
# Remove the default "Sheet" from the new Excel file | |
new_wb.remove(new_wb['Sheet']) | |
# Iterate over unique values and copy data to corresponding sheet | |
for value in unique_values: | |
# Create new sheet in the new Excel file | |
new_ws = new_wb.create_sheet(value) | |
# Add header row | |
new_ws.append([cell.value for cell in ws[1]]) | |
# Track of values to add to this sheet, e.g employees that are grade 1A | |
values_to_add = [] | |
# Copy rows with matching values in column to the new sheet except the header | |
column_to_index = ord(column_to_use.lower()) - 97 | |
for row in ws.iter_rows(min_row=2, values_only=True): | |
if row[column_to_index] == value: | |
values_to_add.append(row) | |
# Sort the values to add by their salary | |
column_to_sort_index = ord(column_to_sort.lower()) - 97 | |
values_to_add.sort(key=lambda x:x[column_to_sort_index]) | |
# Add the values to the sheet | |
for sorted_value in values_to_add: | |
new_ws.append(sorted_value) | |
# Define the range to average (excluding the header) | |
range_to_average = new_ws[f'{column_to_sort}2':column_to_sort+str(new_ws.max_row)] | |
all_salary_values = [cell.value for row in range_to_average for cell in row] | |
# Calculate the average | |
average = sum(all_salary_values) / len(range_to_average) | |
new_ws[f'{min_column}1'] = 'Minimum' | |
new_ws[f'{min_column}2'] = min(all_salary_values) | |
# Write the average to cell J1 | |
new_ws[f'{avg_column}1'] = 'Average' | |
new_ws[f'{avg_column}2'] = average | |
new_ws[f'{max_column}1'] = 'Maximum' | |
new_ws[f'{max_column}2'] = max(all_salary_values) | |
return new_wb | |
def create_alphabet_list(start_alpha, end_alpha): | |
alphabet_list = [chr(i) for i in range(ord(start_alpha), ord(end_alpha) + 1)] | |
return alphabet_list | |
def format_graded_excel(new_file, num_format, column_to_sort, date_format, main_table_columns, side_table_columns, bg_color, rows_to_insert, cols_to_insert, freeze_row, zoom_level): | |
print('Formatting new workbook!') | |
for sheet in new_file.worksheets: | |
# Remove all the gridlines | |
sheet.sheet_view.showGridLines = False | |
# Define the range of employees to help with count | |
range_to_average = sheet[f'{column_to_sort}2':column_to_sort+str(sheet.max_row)] | |
all_salary_values = [cell.value for row in range_to_average for cell in row] | |
# Add the number format to the 3 new calculated numbers | |
for cell in [f'{min_column}2', f'{avg_column}2', f'{max_column}2']: | |
sheet[cell].number_format = num_format | |
# Iterate over all cells in row 1 and set the font style to bold | |
for cell in sheet[1]: | |
cell.font = openpyxl.styles.Font(bold=True) | |
# Iterate over all rows in salary column (except the header) and format the cells as numbers with thousand separators | |
column_to_sort_index = ord(column_to_sort.lower()) - 97 | |
for row in sheet.iter_rows(min_row=2, min_col=column_to_sort_index + 1, max_col=column_to_sort_index + 1): | |
for cell in row: | |
cell.number_format = num_format | |
# Iterate over all rows in joining date column (except the header) and format the cells as short date | |
for row in sheet.iter_rows(min_row=2, min_col=3, max_col=3): | |
for cell in row: | |
cell.number_format = date_format | |
# Center alignment for all cells | |
for row in sheet.iter_rows(): | |
for cell in row: | |
cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center') | |
# Find the max row in column B to find out the rows of the table | |
max_row = sheet.max_row | |
for row in range(1, max_row + 1): | |
if sheet[f'B{row}'].value is None: | |
max_row = row - 1 | |
break | |
# Add border to all cells in the table | |
border = openpyxl.styles.Side(border_style='thin', color='000000') | |
col_headers = create_alphabet_list(main_table_columns[0], main_table_columns[1]) | |
for col in col_headers: | |
for row in range(1, max_row + 1): | |
cell = sheet[f'{col}{row}'] | |
cell.border = openpyxl.styles.Border(left=border, right=border, top=border, bottom=border) | |
col_headers = create_alphabet_list(side_table_columns[0], side_table_columns[1]) | |
for col in col_headers: | |
for row in range(1, 3): | |
cell = sheet[f'{col}{row}'] | |
cell.border = openpyxl.styles.Border(left=border, right=border, top=border, bottom=border) | |
# Add the hex color to cells which are the table headers | |
col_headers = create_alphabet_list(main_table_columns[0], main_table_columns[1]) + create_alphabet_list(side_table_columns[0], side_table_columns[1]) | |
for col in col_headers: | |
cell = sheet[f'{col}1'] | |
cell.fill = openpyxl.styles.PatternFill(start_color=bg_color, end_color=bg_color, fill_type='solid') | |
# Insert rows and columns for making the tables look cleaner | |
for i in range(0, rows_to_insert): | |
sheet.insert_rows(1) | |
sheet.insert_cols(cols_to_insert) | |
# Add header and subheader | |
sheet['B2'] = f'Corporate Grade: {sheet.title}' | |
sheet['B2'].font = openpyxl.styles.Font(bold=True) | |
sheet['B3'] = f'Total Count: {len(all_salary_values)}' | |
# Merging the header and subheader as their text length is large | |
sheet.merge_cells('B2:C2') | |
sheet.merge_cells('B3:C3') | |
# Freeze the first chosen rows | |
sheet.freeze_panes = f'A{freeze_row + 1}' | |
# Set the zoom level | |
sheet.sheet_view.zoomScale = zoom_level | |
return None | |
def adjust_width_excel(file_path): | |
# Use xlwings library to autoadjust the width of all the columns depending on the text in the column | |
with xw.App(visible=False) as app: | |
wb = xw.Book(file_path) | |
for ws in wb.sheets: | |
ws.autofit(axis="columns") | |
wb.save(file_path) | |
wb.close() | |
# Create new excel file | |
new_excel_file = create_graded_excel(original_workbook, sheet_to_use, column_to_use, column_to_sort, min_column, avg_column, max_column) | |
# Close original excel file | |
original_workbook.close() | |
# Format and save the new Excel file | |
format_graded_excel(new_excel_file, '#,##0', column_to_sort, 'DD-MMM-YY', ['A', 'G'], ['I', 'K'], 'B4C6E7', 5, 1, 4,150) | |
new_excel_file.save(formatted_file_location) | |
# Get all the column width to auto adjust | |
adjust_width_excel(formatted_file_location) | |
print(f'All formatting done and file saved as: {formatted_file_location}!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment