Skip to content
Merged
Prev Previous commit
Next Next commit
maintain collision avoidance while arranging points in c-curves
  • Loading branch information
rl-utility-man authored May 4, 2025
commit 5469864eafbaa73eff467ba732e3a949ae589807
140 changes: 78 additions & 62 deletions doc/python/line-and-scatter.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,30 +293,32 @@ import pandas as pd
import plotly.express as px
import collections

def negative_1_if_count_is_odd(count):
# if this is an odd numbered entry in its bin, make its y coordinate negative
# the y coordinate of the first entry is 0, so entries 3, 5, and 7 get negative y coordinates
if count%2 == 1:
return -1
else:
return 1


def negative_1_if_count_is_odd(count):
# if this is an odd numbered entry in its bin, make its y coordinate negative
# the y coordinate of the first entry is 0, so entries 3, 5, and 7 get
# negative y coordinates
if count % 2 == 1:
return -1
else:
return 1


def swarm(
X_series,
point_size=16,
fig_width = 800,
fig_width=800,
gap_multiplier=1.2,
bin_fraction=0.95, #bin fraction slightly undersizes the bins to avoid collisions
):
#sorting will align columns in attractive arcs rather than having columns the vary unpredicatbly in the x-dimension
X_series=X_series.copy().sort_values()

bin_fraction=0.95, # slightly undersizes the bins to avoid collisions
):
# sorting will align columns in attractive c-shaped arcs rather than having
# columns that vary unpredictably in the x-dimension.
# We also exploit the fact that sorting means we see bins sequentially when
# we add collision prevention offsets.
X_series = X_series.copy().sort_values()

# we need to reason in terms of the marker size that is measured in px
# so we need to think about each x-coordinate as being a fraction of the way from the
# so we need to think about each x-coordinate as being a fraction of the way from the
# minimum X value to the maximum X value
min_x = min(X_series)
max_x = max(X_series)
Expand All @@ -329,79 +331,93 @@ def swarm(
for x_val in X_series:
# assign this x_value to bin number
# each bin is a vertical strip slightly narrower than one marker

bin=(((fig_width*bin_fraction*(x_val-min_x))/(max_x-min_x)) // point_size)
bin = (((fig_width*bin_fraction*(x_val-min_x))/(max_x-min_x)) // point_size)

#update the count of dots in that strip
# update the count of dots in that strip
bin_counter.update([bin])

# remember the "y-slot" which tells us the number of points in this bin and is sufficient to compute the y coordinate unless there's a collision with the point to its left
list_of_rows.append(
{"x": x_val, "y_slot": bin_counter[bin], "bin": bin})

# the collision free y coordinate gives the items in a vertical bin
# coordinates: 0, 1, -1, 2, -2, 3, -3 ... and so on to evenly spread
# their locations above and below the y-axis (we'll make a correction below to deal with even numbers of entries)
# we then scale this by the point_size*gap_multiplier to get a y coordinate in px

collision_free_y_coordinate=(bin_counter[bin]//2)*negative_1_if_count_is_odd(bin_counter[bin])*point_size*gap_multiplier
list_of_rows.append({"x":x_val,"y":collision_free_y_coordinate,"bin":bin})



# iterate through the points and "offset" any that are colliding with a
# point to their left apply the offsets to all subsequent points in the same bin.
# this arranges points in an attractive swarm c-curve where the points
# toward the edges are (weakly) further right.
bin = 0
offset = 0
for row in list_of_rows:
bin = row["bin"]
#see if we need to "look left" to avoid a possible collision
if bin != row["bin"]:
# we have moved to a new bin, so we need to reset the offset
bin = row["bin"]
offset = 0
# see if we need to "look left" to avoid a possible collision
for other_row in list_of_rows:
if (other_row["bin"]==bin-1 ):
#"bubble" the entry up until we find a slot that avoids a collision
while ((other_row["y"]==row["y"])
and (((fig_width*(row["x"]-other_row["x"]))/(max_x-min_x) // point_size) < 1)):
print(row)
print(other_row)
print(((fig_width*(row["x"]-other_row["x"] ))/(max_x-min_x) // point_size))

print("updating to fix collision")
if (other_row["bin"] == bin-1):
# "bubble" the entry up until we find a slot that avoids a collision
while ((other_row["y_slot"] == row["y_slot"]+offset)
and (((fig_width*(row["x"]-other_row["x"]))/(max_x-min_x)
// point_size) < 1)):
offset += 1
# update the bin count so we know whether the number of
# *used* slots is even or odd
bin_counter.update([bin])
print(bin_counter[bin])
row["y"]=(bin_counter[bin]//2)*negative_1_if_count_is_odd(bin_counter[bin])*point_size*gap_multiplier
print(row["y"])

# if the number of points is even,
# move y-coordinates down to put an equal number of entries above and below the axis
row["y_slot"] += offset
# The collision free y coordinate gives the items in a vertical bin
# y-coordinates to evenly spread their locations above and below the
# y-axis (we'll make a correction below to deal with even numbers of
# entries). For now, we'll assign 0, 1, -1, 2, -2, 3, -3 ... and so on.
# We scale this by the point_size*gap_multiplier to get a y coordinate
# in px.
row["y"] = (row["y_slot"]//2) * \
negative_1_if_count_is_odd(row["y_slot"])*point_size*gap_multiplier
print(row["y"])

# if the number of points is even, move y-coordinates down to put an equal
# number of entries above and below the axis
for row in list_of_rows:
if bin_counter[row["bin"]]%2==0:
row["y"]-=point_size*gap_multiplier/2

if bin_counter[row["bin"]] % 2 == 0:
row["y"] -= point_size*gap_multiplier/2

df = pd.DataFrame(list_of_rows)
# one way to make this code more flexible to e.g. handle multiple categories would be to return a list of "swarmified" y coordinates here
# you could then generate "swarmified" y coordinates for each category and add category specific offsets before scatterplotting them
# One way to make this code more flexible to e.g. handle multiple categories
# would be to return a list of "swarmified" y coordinates here and then plot
# outside the function.
# That generalization would let you "swarmify" y coordinates for each
# category and add category specific offsets to put the each category in its
# own row

fig = px.scatter(
df,
x="x",
y="y",
)
#we want to suppress the y coordinate in the hover value because the y-coordinate is irrelevant/misleading
# we want to suppress the y coordinate in the hover value because the
# y-coordinate is irrelevant/misleading
fig.update_traces(
marker_size=point_size,
#suppress the y coordinate because the y-coordinate is irrelevant
# suppress the y coordinate because the y-coordinate is irrelevant
hovertemplate="<b>value</b>: %{x}",
)
# we have to set the width and height because we aim to avoid icon collisions and we specify the icon size
# in the same units as the width and height
fig.update_layout(width=fig_width, height=(point_size*max(bin_counter.values())+200))
# we have to set the width and height because we aim to avoid icon collisions
# and we specify the icon size in the same units as the width and height
fig.update_layout(width=fig_width, height=(
point_size*max(bin_counter.values())+200))
fig.update_yaxes(
showticklabels=False, # Turn off y-axis labels
ticks='', # Remove the ticks
title=""
showticklabels=False, # Turn off y-axis labels
ticks='', # Remove the ticks
title=""
)
return fig



df_iris = px.data.iris() # iris is a pandas DataFrame
x = df_iris["sepal_length"]
fig = swarm(x)
fig.show()
df = px.data.iris() # iris is a pandas DataFrame
fig = swarm(df["sepal_length"])
# here's a more interesting test case for collision avoidance:
#fig = swarm(pd.Series([1, 1.5, 1.78, 1.79, 1.85, 2,
# 2, 2, 2, 3, 3, 2.05, 2.1, 2.2, 2.5, 12]))
fig.show()
```

## Scatter and line plots with go.Scatter
Expand Down