Last active
April 15, 2021 22:38
-
-
Save CreamyCookie/7192358a9f3dbb32cc1ef01784509b3d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dataclasses import dataclass | |
def hyperbolic_smoother(x, n = 10.0): | |
"""n determines how strongly smaller values will be magnified""" | |
return (n + 1) * x / (n * x + 1) | |
@dataclass | |
class Track: | |
duration_seconds: int | |
star_rating: int | |
# if this is 4 valid ratings are: 0, 1, 2, 3, 4 | |
MAX_RATING = 4 | |
# for experiments | |
WEIGHTED_INTERPOLATION_RATIO = 0.3 | |
UNWEIGHTED_INTERPOLATION_RATIO = 1 - WEIGHTED_INTERPOLATION_RATIO | |
# Limits a weight to this value * (1 / len(tracks)) | |
MAX_WEIGHT_FACTOR = 1.2 | |
MIN_WEIGHT_FACTOR = 1 / MAX_WEIGHT_FACTOR | |
examples = [ | |
[Track(10, 4), Track(100, 2), Track(1000, 1), Track(120, 1), Track(160, 3), | |
Track(9000, 4), Track(200, 0)], | |
[Track(120, 2), Track(240, 4), Track(600, 0)], | |
[Track(120, 4), Track(140, 2), Track(210, 3), Track(200, 4), Track(180, 4), | |
Track(35 * 60, 0)], | |
[Track(120, 4), Track(240, 4), Track(1200, 0), Track(1600, 0)], | |
[Track(299, 4), Track(265, 3), Track(391, 4), Track(250, 3), Track(325, 4), Track(195, 3), Track(346, 4), Track(347, 3), Track(317, 2), Track(407, 3), Track(346, 3), Track(239, 4)], | |
] | |
print("The following (duration-based) weights represent the ratio / percentage" | |
" a tracks'\nrating makes up of the album's average rating (which is in " | |
"the TOTAL row).") | |
print() | |
for tracks in examples: | |
total_duration = float(sum(i.duration_seconds for i in tracks)) | |
mean_weight = 1 / len(tracks) | |
mean_duration = total_duration / len(tracks) | |
max_duration = mean_duration * MAX_WEIGHT_FACTOR | |
smoothed_duration_weights = [] | |
total_weight = 0 | |
for t in tracks: | |
dur = t.duration_seconds | |
# experiment: directly limit the weight a single track can have | |
# fail: often results in very flat weights (and feels wrong) | |
#if dur > max_duration: | |
# dur = max_duration | |
duration_ratio = dur / total_duration | |
weight = hyperbolic_smoother(duration_ratio) | |
# experiment: interpolate between 1 / len(tracks) and duration / total | |
# fail: when long track exists, the rest look very similar | |
#weight = (UNWEIGHTED_INTERPOLATION_RATIO * mean_weight) + ( | |
# WEIGHTED_INTERPOLATION_RATIO * duration_ratio) | |
total_weight += weight | |
smoothed_duration_weights.append(weight) | |
print(" simple") | |
print(" smoothed ratio") | |
print(" partial simple partial") | |
print(" smoothed weighted ratio weighted") | |
print("rating minutes ➜ weight rating VERSUS weight rating") | |
print('-' * 80) | |
average_smoothed_rating = 0 | |
average_simple_rating = 0 | |
sum_of_ratings = 0 | |
for track, weight in zip(tracks, smoothed_duration_weights): | |
star_rating = track.star_rating | |
dur = track.duration_seconds | |
smoothed_ratio = weight / total_weight | |
simple_ratio = dur / total_duration | |
float_rating = star_rating / MAX_RATING | |
sum_of_ratings += float_rating | |
wr = smoothed_ratio * float_rating | |
sr = simple_ratio * float_rating | |
average_simple_rating += sr | |
average_smoothed_rating += wr | |
print(f"{star_rating}★ = {float_rating:1.2f} {dur / 60:>6.1f} " | |
f"{smoothed_ratio:>6.1%} {wr:.4f} " | |
f"{simple_ratio:>6.1%} {sr:.4f}") | |
print('-' * 80) | |
print(f"TOTAL: {total_duration / 60:>6.1f} " | |
f"{average_smoothed_rating:.4f} " | |
f"{average_simple_rating:.4f}") | |
print() | |
old_rating = sum_of_ratings / len(tracks) | |
print(f"old (unweighted) average rating: {old_rating:.4f}") | |
print(f"old (unweighted) weight (1/len): {mean_weight:6.1%}") | |
print() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
The following (duration-based) weights represent the ratio / percentage a tracks' | |
rating makes up of the album's average rating (which is in the TOTAL row). | |
simple | |
smoothed ratio | |
partial simple partial | |
smoothed weighted ratio weighted | |
rating minutes ➜ weight rating VERSUS weight rating | |
-------------------------------------------------------------------------------- | |
4★ = 1.00 0.2 0.5% 0.0050 0.1% 0.0009 | |
2★ = 0.50 1.7 4.6% 0.0231 0.9% 0.0047 | |
1★ = 0.25 16.7 26.0% 0.0650 9.4% 0.0236 | |
1★ = 0.25 2.0 5.4% 0.0136 1.1% 0.0028 | |
3★ = 0.75 2.7 7.0% 0.0527 1.5% 0.0113 | |
4★ = 1.00 150.0 47.9% 0.4790 85.0% 0.8499 | |
0★ = 0.00 3.3 8.5% 0.0000 1.9% 0.0000 | |
-------------------------------------------------------------------------------- | |
TOTAL: 176.5 0.6384 0.8933 | |
old (unweighted) average rating: 0.5357 | |
old (unweighted) weight (1/len): 14.3% | |
simple | |
smoothed ratio | |
partial simple partial | |
smoothed weighted ratio weighted | |
rating minutes ➜ weight rating VERSUS weight rating | |
-------------------------------------------------------------------------------- | |
2★ = 0.50 2.0 26.1% 0.1303 12.5% 0.0625 | |
4★ = 1.00 4.0 33.5% 0.3350 25.0% 0.2500 | |
0★ = 0.00 10.0 40.4% 0.0000 62.5% 0.0000 | |
-------------------------------------------------------------------------------- | |
TOTAL: 16.0 0.4653 0.3125 | |
old (unweighted) average rating: 0.5000 | |
old (unweighted) weight (1/len): 33.3% | |
simple | |
smoothed ratio | |
partial simple partial | |
smoothed weighted ratio weighted | |
rating minutes ➜ weight rating VERSUS weight rating | |
-------------------------------------------------------------------------------- | |
4★ = 1.00 2.0 10.8% 0.1076 4.1% 0.0407 | |
2★ = 0.50 2.3 12.0% 0.0599 4.7% 0.0237 | |
3★ = 0.75 3.5 15.5% 0.1161 7.1% 0.0534 | |
4★ = 1.00 3.3 15.0% 0.1504 6.8% 0.0678 | |
4★ = 1.00 3.0 14.1% 0.1410 6.1% 0.0610 | |
0★ = 0.00 35.0 32.6% 0.0000 71.2% 0.0000 | |
-------------------------------------------------------------------------------- | |
TOTAL: 49.2 0.5750 0.2466 | |
old (unweighted) average rating: 0.7083 | |
old (unweighted) weight (1/len): 16.7% | |
simple | |
smoothed ratio | |
partial simple partial | |
smoothed weighted ratio weighted | |
rating minutes ➜ weight rating VERSUS weight rating | |
-------------------------------------------------------------------------------- | |
4★ = 1.00 2.0 11.8% 0.1179 3.8% 0.0380 | |
4★ = 1.00 4.0 18.5% 0.1850 7.6% 0.0759 | |
0★ = 0.00 20.0 33.9% 0.0000 38.0% 0.0000 | |
0★ = 0.00 26.7 35.8% 0.0000 50.6% 0.0000 | |
-------------------------------------------------------------------------------- | |
TOTAL: 52.7 0.3029 0.1139 | |
old (unweighted) average rating: 0.5000 | |
old (unweighted) weight (1/len): 25.0% | |
simple | |
smoothed ratio | |
partial simple partial | |
smoothed weighted ratio weighted | |
rating minutes ➜ weight rating VERSUS weight rating | |
-------------------------------------------------------------------------------- | |
4★ = 1.00 5.0 8.2% 0.0824 8.0% 0.0802 | |
3★ = 0.75 4.4 7.7% 0.0577 7.1% 0.0533 | |
4★ = 1.00 6.5 9.5% 0.0948 10.5% 0.1049 | |
3★ = 0.75 4.2 7.4% 0.0558 6.7% 0.0503 | |
4★ = 1.00 5.4 8.6% 0.0862 8.7% 0.0872 | |
3★ = 0.75 3.2 6.4% 0.0477 5.2% 0.0392 | |
4★ = 1.00 5.8 8.9% 0.0891 9.3% 0.0928 | |
3★ = 0.75 5.8 8.9% 0.0670 9.3% 0.0698 | |
2★ = 0.50 5.3 8.5% 0.0426 8.5% 0.0425 | |
3★ = 0.75 6.8 9.7% 0.0725 10.9% 0.0819 | |
3★ = 0.75 5.8 8.9% 0.0669 9.3% 0.0696 | |
4★ = 1.00 4.0 7.2% 0.0723 6.4% 0.0641 | |
-------------------------------------------------------------------------------- | |
TOTAL: 62.1 0.8350 0.8361 | |
old (unweighted) average rating: 0.8333 | |
old (unweighted) weight (1/len): 8.3% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment