Skip to content

Instantly share code, notes, and snippets.

@Nempickaxe
Last active February 11, 2021 15:52
Show Gist options
  • Save Nempickaxe/1dd4a01b692dba82bacac3c897e5ceb4 to your computer and use it in GitHub Desktop.
Save Nempickaxe/1dd4a01b692dba82bacac3c897e5ceb4 to your computer and use it in GitHub Desktop.
split a sentence based on maximum character width of sentences
def get_interval(space_list, width):
for i in range(len(space_list)-1):
if space_list[i+1]>width:
return space_list[i]
else:
continue
return space_list[-1]
def get_subtracted_list(space_list, width):
return list(map(lambda x: int(((x-width)+abs(x-width))/2), space_list))
def get_all_breaks(space_list, width):
space_list_copy = space_list.copy()
split_indices = []
last_space_ind = 10 #random positve number
while last_space_ind>0:
last_space_ind = get_interval(space_list_copy, width)
split_indices.append(last_space_ind)
space_list_copy = get_subtracted_list(space_list_copy, last_space_ind)
#non-cumulative list
split_indices = split_indices[:-1]
return split_indices
def splitting(x, width=400):
sentence_tray = []
copy_x = x
space_list = [m.start() for m in re.finditer(' ', x)]
if not space_list:
space_list = [len(x)]
all_breaks = get_all_breaks(space_list, width=width)
for item in all_breaks:
sentence_tray.append(copy_x[:item].strip())
copy_x = copy_x[item:]
sentence_tray.append(copy_x.strip())
if '' in sentence_tray:
sentence_tray.remove('')
return sentence_tray
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment