Skip to content

Instantly share code, notes, and snippets.

@Ishmam156
Created March 3, 2023 13:49
Show Gist options
  • Save Ishmam156/44d6f69ee37036d3ea6b936ff0a164ef to your computer and use it in GitHub Desktop.
Save Ishmam156/44d6f69ee37036d3ea6b936ff0a164ef to your computer and use it in GitHub Desktop.
Python script to format an excel file according to your needs
import openpyxl
import xlwings as xw
# Main variables for the transformation
starting_file_location = 'your_original_file.xlsx'
sheet_to_use = 'name_of_sheet_of_original_file'
formatted_file_location = 'new_file_name.xlsx'
column_to_use = 'G' # Column that will be used for creating new sheets
column_to_sort = 'E'
min_column = 'I'
avg_column = 'J'
max_column = 'K'
# Load the original Excel file
original_workbook = openpyxl.load_workbook(starting_file_location)
def create_graded_excel(original_file, sheet_to_use, column_to_use, column_to_sort, min_column, avg_column, max_column):
print('Creating new workbook!')
ws = original_file[sheet_to_use]
# Get unique values from column G (Corporate Grade) and sort in ascending order
unique_values = sorted(set([cell.value for cell in ws[column_to_use][1:] if cell.value != None]))
# Create new Excel file
new_wb = openpyxl.Workbook()
# Remove the default "Sheet" from the new Excel file
new_wb.remove(new_wb['Sheet'])
# Iterate over unique values and copy data to corresponding sheet
for value in unique_values:
# Create new sheet in the new Excel file
new_ws = new_wb.create_sheet(value)
# Add header row
new_ws.append([cell.value for cell in ws[1]])
# Track of values to add to this sheet, e.g employees that are grade 1A
values_to_add = []
# Copy rows with matching values in column to the new sheet except the header
column_to_index = ord(column_to_use.lower()) - 97
for row in ws.iter_rows(min_row=2, values_only=True):
if row[column_to_index] == value:
values_to_add.append(row)
# Sort the values to add by their salary
column_to_sort_index = ord(column_to_sort.lower()) - 97
values_to_add.sort(key=lambda x:x[column_to_sort_index])
# Add the values to the sheet
for sorted_value in values_to_add:
new_ws.append(sorted_value)
# Define the range to average (excluding the header)
range_to_average = new_ws[f'{column_to_sort}2':column_to_sort+str(new_ws.max_row)]
all_salary_values = [cell.value for row in range_to_average for cell in row]
# Calculate the average
average = sum(all_salary_values) / len(range_to_average)
new_ws[f'{min_column}1'] = 'Minimum'
new_ws[f'{min_column}2'] = min(all_salary_values)
# Write the average to cell J1
new_ws[f'{avg_column}1'] = 'Average'
new_ws[f'{avg_column}2'] = average
new_ws[f'{max_column}1'] = 'Maximum'
new_ws[f'{max_column}2'] = max(all_salary_values)
return new_wb
def create_alphabet_list(start_alpha, end_alpha):
alphabet_list = [chr(i) for i in range(ord(start_alpha), ord(end_alpha) + 1)]
return alphabet_list
def format_graded_excel(new_file, num_format, column_to_sort, date_format, main_table_columns, side_table_columns, bg_color, rows_to_insert, cols_to_insert, freeze_row, zoom_level):
print('Formatting new workbook!')
for sheet in new_file.worksheets:
# Remove all the gridlines
sheet.sheet_view.showGridLines = False
# Define the range of employees to help with count
range_to_average = sheet[f'{column_to_sort}2':column_to_sort+str(sheet.max_row)]
all_salary_values = [cell.value for row in range_to_average for cell in row]
# Add the number format to the 3 new calculated numbers
for cell in [f'{min_column}2', f'{avg_column}2', f'{max_column}2']:
sheet[cell].number_format = num_format
# Iterate over all cells in row 1 and set the font style to bold
for cell in sheet[1]:
cell.font = openpyxl.styles.Font(bold=True)
# Iterate over all rows in salary column (except the header) and format the cells as numbers with thousand separators
column_to_sort_index = ord(column_to_sort.lower()) - 97
for row in sheet.iter_rows(min_row=2, min_col=column_to_sort_index + 1, max_col=column_to_sort_index + 1):
for cell in row:
cell.number_format = num_format
# Iterate over all rows in joining date column (except the header) and format the cells as short date
for row in sheet.iter_rows(min_row=2, min_col=3, max_col=3):
for cell in row:
cell.number_format = date_format
# Center alignment for all cells
for row in sheet.iter_rows():
for cell in row:
cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center')
# Find the max row in column B to find out the rows of the table
max_row = sheet.max_row
for row in range(1, max_row + 1):
if sheet[f'B{row}'].value is None:
max_row = row - 1
break
# Add border to all cells in the table
border = openpyxl.styles.Side(border_style='thin', color='000000')
col_headers = create_alphabet_list(main_table_columns[0], main_table_columns[1])
for col in col_headers:
for row in range(1, max_row + 1):
cell = sheet[f'{col}{row}']
cell.border = openpyxl.styles.Border(left=border, right=border, top=border, bottom=border)
col_headers = create_alphabet_list(side_table_columns[0], side_table_columns[1])
for col in col_headers:
for row in range(1, 3):
cell = sheet[f'{col}{row}']
cell.border = openpyxl.styles.Border(left=border, right=border, top=border, bottom=border)
# Add the hex color to cells which are the table headers
col_headers = create_alphabet_list(main_table_columns[0], main_table_columns[1]) + create_alphabet_list(side_table_columns[0], side_table_columns[1])
for col in col_headers:
cell = sheet[f'{col}1']
cell.fill = openpyxl.styles.PatternFill(start_color=bg_color, end_color=bg_color, fill_type='solid')
# Insert rows and columns for making the tables look cleaner
for i in range(0, rows_to_insert):
sheet.insert_rows(1)
sheet.insert_cols(cols_to_insert)
# Add header and subheader
sheet['B2'] = f'Corporate Grade: {sheet.title}'
sheet['B2'].font = openpyxl.styles.Font(bold=True)
sheet['B3'] = f'Total Count: {len(all_salary_values)}'
# Merging the header and subheader as their text length is large
sheet.merge_cells('B2:C2')
sheet.merge_cells('B3:C3')
# Freeze the first chosen rows
sheet.freeze_panes = f'A{freeze_row + 1}'
# Set the zoom level
sheet.sheet_view.zoomScale = zoom_level
return None
def adjust_width_excel(file_path):
# Use xlwings library to autoadjust the width of all the columns depending on the text in the column
with xw.App(visible=False) as app:
wb = xw.Book(file_path)
for ws in wb.sheets:
ws.autofit(axis="columns")
wb.save(file_path)
wb.close()
# Create new excel file
new_excel_file = create_graded_excel(original_workbook, sheet_to_use, column_to_use, column_to_sort, min_column, avg_column, max_column)
# Close original excel file
original_workbook.close()
# Format and save the new Excel file
format_graded_excel(new_excel_file, '#,##0', column_to_sort, 'DD-MMM-YY', ['A', 'G'], ['I', 'K'], 'B4C6E7', 5, 1, 4,150)
new_excel_file.save(formatted_file_location)
# Get all the column width to auto adjust
adjust_width_excel(formatted_file_location)
print(f'All formatting done and file saved as: {formatted_file_location}!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment