From a575079c06a2e9eb228d96dd00f0203e5fe2a0cd Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 27 Aug 2024 15:16:11 +1200 Subject: [PATCH 1/9] [docs] add tutorial: Performance problems with sum-if formulations --- docs/make.jl | 1 + docs/src/tutorials/getting_started/sum_if.jl | 245 +++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 docs/src/tutorials/getting_started/sum_if.jl diff --git a/docs/make.jl b/docs/make.jl index 91fe8e315e7..c52062d7bd3 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -317,6 +317,7 @@ const _PAGES = [ "tutorials/getting_started/debugging.md", "tutorials/getting_started/design_patterns_for_larger_models.md", "tutorials/getting_started/performance_tips.md", + "tutorials/getting_started/sum_if.md", ], "Transitioning" => ["tutorials/transitioning/transitioning_from_matlab.md"], diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl new file mode 100644 index 00000000000..c513fd0a19f --- /dev/null +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -0,0 +1,245 @@ +# Copyright (c) 2024 Oscar Dowson and contributors #src +# #src +# Permission is hereby granted, free of charge, to any person obtaining a copy #src +# of this software and associated documentation files (the "Software"), to deal #src +# in the Software without restriction, including without limitation the rights #src +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #src +# copies of the Software, and to permit persons to whom the Software is #src +# furnished to do so, subject to the following conditions: #src +# #src +# The above copyright notice and this permission notice shall be included in all #src +# copies or substantial portions of the Software. #src +# #src +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #src +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #src +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #src +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #src +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #src +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #src +# SOFTWARE. #src + +# # Performance problems with sum-if formulations + +# The purpose of this tutorial is to explain a common performance issue that can +# arise with summations like `sum(x[a] for a in list if condition(a))`. This +# issue is particularly common in models with graph or network structures. + +# !!! tip +# This tutorial is more advanced than the other "Getting started" tutorials. +# It's in the "Getting started" section because it is one of the most common +# causes of performance problems that users experience when they first start +# using JuMP to write large scale programs. If you are new to JuMP, you may +# want to briefly skim the tutorial, and come back to it once you have +# written a few JuMP models. + +# ## Required packages + +# This tutorial uses the following packages + +using JuMP +import Plots +import Statistics + +# ## Data + +# As a motivating example, we consider a network flow problem, like the examples +# in [Network flow problems](@ref) or [The network multi-commodity flow problem](@ref). + +# Here is a function that builds a random graph. The specifics do not matter. + +function build_random_graph(num_nodes::Int, num_edges::Int) + nodes = 1:num_nodes + edges = Pair{Int,Int}[i - 1 => i for i in 2:num_nodes] + while length(edges) < num_edges + edge = rand(nodes) => rand(nodes) + if !(edge in edges) + push!(edges, edge) + end + end + function demand(n) + if n == 1 + return -1 + elseif n == num_nodes + return 1 + else + return 0 + end + end + return nodes, edges, demand +end + +nodes, edges, demand = build_random_graph(4, 8) + +# The goal is to decide the flow of a commodity along each edge in `edges` to +# satisfy the `demand(n)` of each node `n` in `nodes`. + +# The mathematical formulation is: +# +# ```math +# \begin{aligned} +# s.t. && \sum_{(i,n)\in E} x_{i,n} - \sum_{(n,j)\in E} x_{n,j} = d_n && \forall n \in N\\ +# && x_{e} \\ge 0 && \forall e \in E +# \end{aligned} +# ``` + +# ## Naïve model + +# The first model you might write down is: + +model = Model() +@variable(model, flows[e in edges] >= 0) +@constraint( + model, + [n in nodes], + sum(flows[(i, j)] for (i, j) in edges if j == n) - + sum(flows[(i, j)] for (i, j) in edges if i == n) == demand(n) +); + +# The benefit of this formulation is that it looks very similar to the +# mathematical formulation of a network flow problem. + +# The downside to this formulation is subtle. Behind the scenes, the JuMP macros +# expand to: + +model = Model() +@variable(model, flows[e in edges] >= 0) +for n in nodes + flow_out = AffExpr(0.0) + for (i, j) in edges + if i == n + add_to_expression!(flow_out, flows[(i, j)]) + end + end + flow_in = AffExpr(0.0) + for (i, j) in edges + if j == n + add_to_expression!(flow_in, flows[(i, j)]) + end + end + @constraint(model, flow_out - flow_in == demand(n)) +end + +# This formulation includes two for-loops, that loop over every edge (twice) for +# every node. The big-O notation of this is ``O(|nodes| \times |edges|)``. If +# you have a large number of nodes and a large number of edges, the runtime of +# this loop can be large. + +# Let's build a function to benchmark our formulation: + +function build_naive_model(nodes, edges, demand) + model = Model() + @variable(model, flows[e in edges] >= 0) + @constraint( + model, + [n in nodes], + sum(flows[(i, j)] for (i, j) in edges if i == n) - + sum(flows[(i, j)] for (i, j) in edges if j == n) == demand(n) + ) + return model +end + +nodes, edges, demand = build_random_graph(1_000, 2_000) +@elapsed build_naive_model(nodes, edges, demand) + +# A good way to benchmark is to measure the runtime across a wide range of input +# sizes. From our big-O analysis, we should expect that doubling the number of +# nodes and edges results in a 4x increase in the runtime. + +run_times = Float64[] +factors = 1:10 +for factor in factors + graph = build_random_graph(1_000 * factor, 5_000 * factor) + push!(run_times, @elapsed build_naive_model(graph...)) +end +Plots.plot(; xlabel = "Factor", ylabel = "Runtime [s]") +Plots.plot!(factors, run_times; label = "Actual") +a, b = hcat(ones(10), factors .^ 2) \ run_times +Plots.plot!(factors, a .+ b * factors .^ 2; label = "Quadratic fit") + +# As expected, the runtimes demonstrate quadratic scaling: if we double the +# number of nodes and edges, the runtime increases by a factor of four. + +# ## Caching + +# We can improve our formulation by caching the list of incoming and outgoing +# nodes for each node `n`: + +out_nodes = Dict(n => Int[] for n in nodes) +in_nodes = Dict(n => Int[] for n in nodes) +for (i, j) in edges + push!(out_nodes[i], j) + push!(in_nodes[j], i) +end + +# with the corresponding change to our model: + +model = Model() +@variable(model, flows[e in edges] >= 0) +@constraint( + model, + [n in nodes], + sum(flows[(n, j)] for j in out_nodes[n]) - + sum(flows[(i, n)] for i in in_nodes[n]) == demand(n) +); + +# The benefit of this formulation is that we now loop over |`out_nodes[n]`| +# rather that `edges` for each node `n`. If the graph is sparse, so that the +# number of edges attached to a node is much less than the total number of +# edges, then this can have a large performance benefit. + +# Let's build a new function to benchmark our formulation: + +function build_cached_model(nodes, edges, demand) + out_nodes = Dict(n => Int[] for n in nodes) + in_nodes = Dict(n => Int[] for n in nodes) + for (i, j) in edges + push!(out_nodes[i], j) + push!(in_nodes[j], i) + end + model = Model() + @variable(model, flows[e in edges] >= 0) + @constraint( + model, + [n in nodes], + sum(flows[(n, j)] for j in out_nodes[n]) - + sum(flows[(i, n)] for i in in_nodes[n]) == demand(n) + ) + return model +end + +nodes, edges, demand = build_random_graph(1_000, 2_000) +@elapsed build_cached_model(nodes, edges, demand) + +# ## Analysis + +# Now we can analyse the difference in runtime of the two formulations: + +run_times_naive = Float64[] +run_times_cached = Float64[] +factors = 1:10 +for factor in factors + graph = build_random_graph(1_000 * factor, 5_000 * factor) + push!(run_times_naive, @elapsed build_naive_model(graph...)) + push!(run_times_cached, @elapsed build_cached_model(graph...)) +end +Plots.plot(; xlabel = "Factor", ylabel = "Runtime [s]") +Plots.plot!(factors, run_times_naive; label = "Actual") +Plots.plot!(factors, run_times_cached; label = "Cached") + +# Even though the cached model needs to build `in_nodes`` and `out_nodes`, it is +# asymptotically faster. + +# ## Lesson + +# If you write code with `sum-if` type conditions, for example, +#`@constraint(model, [a in set], sum(x[b] for b in list if condition(a, b))`, +# you can improve the performance by caching of the elements for which `condition(a, b)` +# is true. + +# Finally, you should understand that this behavior is not specific to JuMP, and +# that it applies more generally to all computer programs you might write. +# (Python programs that use Pyomo or gurobipy would similarly benefit from this +# caching approach.) +# +# Understanding big-O notation and algorithmic complexity is a useful debugging +# skill to have, regardless of the type of program that you are writing. From f9ff3fa7e3cdaff4c5a787ab65c7ef8a43643de1 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 27 Aug 2024 15:40:44 +1200 Subject: [PATCH 2/9] Update docs/src/tutorials/getting_started/sum_if.jl --- docs/src/tutorials/getting_started/sum_if.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index c513fd0a19f..3f708b41675 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -232,7 +232,7 @@ Plots.plot!(factors, run_times_cached; label = "Cached") # ## Lesson # If you write code with `sum-if` type conditions, for example, -#`@constraint(model, [a in set], sum(x[b] for b in list if condition(a, b))`, +# `@constraint(model, [a in set], sum(x[b] for b in list if condition(a, b))`, # you can improve the performance by caching of the elements for which `condition(a, b)` # is true. From 5654ac82d0a90b931bb10435fd7c9875c034d06d Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 27 Aug 2024 15:49:57 +1200 Subject: [PATCH 3/9] Apply suggestions from code review --- docs/src/tutorials/getting_started/sum_if.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index 3f708b41675..ab46f33f63b 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -29,7 +29,7 @@ # It's in the "Getting started" section because it is one of the most common # causes of performance problems that users experience when they first start # using JuMP to write large scale programs. If you are new to JuMP, you may -# want to briefly skim the tutorial, and come back to it once you have +# want to briefly skim the tutorial and come back to it once you have # written a few JuMP models. # ## Required packages @@ -98,8 +98,8 @@ model = Model() # The benefit of this formulation is that it looks very similar to the # mathematical formulation of a network flow problem. -# The downside to this formulation is subtle. Behind the scenes, the JuMP macros -# expand to: +# The downside to this formulation is subtle. Behind the scenes, the JuMP +# `@constraint` macro expands to something like: model = Model() @variable(model, flows[e in edges] >= 0) @@ -119,8 +119,8 @@ for n in nodes @constraint(model, flow_out - flow_in == demand(n)) end -# This formulation includes two for-loops, that loop over every edge (twice) for -# every node. The big-O notation of this is ``O(|nodes| \times |edges|)``. If +# This formulation includes two for-loops, with a loop over every edge (twice) for +# every node. The [big-O notation](https://en.wikipedia.org/wiki/Big_O_notation) of the runtime is ``O(|nodes| \times |edges|)``. If # you have a large number of nodes and a large number of edges, the runtime of # this loop can be large. @@ -233,7 +233,7 @@ Plots.plot!(factors, run_times_cached; label = "Cached") # If you write code with `sum-if` type conditions, for example, # `@constraint(model, [a in set], sum(x[b] for b in list if condition(a, b))`, -# you can improve the performance by caching of the elements for which `condition(a, b)` +# you can improve the performance by caching the elements for which `condition(a, b)` # is true. # Finally, you should understand that this behavior is not specific to JuMP, and From 256793af6f436de2c259c92575acf3648eae588d Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 27 Aug 2024 16:31:35 +1200 Subject: [PATCH 4/9] Apply suggestions from code review --- docs/src/tutorials/getting_started/sum_if.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index ab46f33f63b..51326c70268 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -78,7 +78,7 @@ nodes, edges, demand = build_random_graph(4, 8) # ```math # \begin{aligned} # s.t. && \sum_{(i,n)\in E} x_{i,n} - \sum_{(n,j)\in E} x_{n,j} = d_n && \forall n \in N\\ -# && x_{e} \\ge 0 && \forall e \in E +# && x_{e} \ge 0 && \forall e \in E # \end{aligned} # ``` @@ -120,7 +120,8 @@ for n in nodes end # This formulation includes two for-loops, with a loop over every edge (twice) for -# every node. The [big-O notation](https://en.wikipedia.org/wiki/Big_O_notation) of the runtime is ``O(|nodes| \times |edges|)``. If +# every node. The [big-O notation](https://en.wikipedia.org/wiki/Big_O_notation) +# of the runtime is ``O(|nodes| \times |edges|)``. If # you have a large number of nodes and a large number of edges, the runtime of # this loop can be large. @@ -182,8 +183,8 @@ model = Model() sum(flows[(i, n)] for i in in_nodes[n]) == demand(n) ); -# The benefit of this formulation is that we now loop over |`out_nodes[n]`| -# rather that `edges` for each node `n`. If the graph is sparse, so that the +# The benefit of this formulation is that we now loop over `out_nodes[n]` +# rather than `edges` for each node `n`. If the graph is sparse, so that the # number of edges attached to a node is much less than the total number of # edges, then this can have a large performance benefit. @@ -226,7 +227,7 @@ Plots.plot(; xlabel = "Factor", ylabel = "Runtime [s]") Plots.plot!(factors, run_times_naive; label = "Actual") Plots.plot!(factors, run_times_cached; label = "Cached") -# Even though the cached model needs to build `in_nodes`` and `out_nodes`, it is +# Even though the cached model needs to build `in_nodes` and `out_nodes`, it is # asymptotically faster. # ## Lesson From 96997abd82350938120a6f53f89f17d9fc3ea57d Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 27 Aug 2024 16:32:03 +1200 Subject: [PATCH 5/9] Update accept.txt --- docs/styles/config/vocabularies/JuMP/accept.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/styles/config/vocabularies/JuMP/accept.txt b/docs/styles/config/vocabularies/JuMP/accept.txt index 7c8ed8c6541..3a38e502e31 100644 --- a/docs/styles/config/vocabularies/JuMP/accept.txt +++ b/docs/styles/config/vocabularies/JuMP/accept.txt @@ -42,6 +42,7 @@ preprint README recurse reimplemented +runtime(?s) [Ss]tacktrace subexpression(?s) subgraph(?s) From 89957758c8cf74d72257511e09449441847e1cb7 Mon Sep 17 00:00:00 2001 From: odow Date: Tue, 27 Aug 2024 19:57:10 +1200 Subject: [PATCH 6/9] Update --- docs/src/tutorials/getting_started/sum_if.jl | 26 +++++++++---------- .../config/vocabularies/JuMP/accept.txt | 2 ++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index 51326c70268..5576d7d9448 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -104,19 +104,19 @@ model = Model() model = Model() @variable(model, flows[e in edges] >= 0) for n in nodes - flow_out = AffExpr(0.0) - for (i, j) in edges - if i == n - add_to_expression!(flow_out, flows[(i, j)]) - end - end flow_in = AffExpr(0.0) for (i, j) in edges if j == n add_to_expression!(flow_in, flows[(i, j)]) end end - @constraint(model, flow_out - flow_in == demand(n)) + flow_out = AffExpr(0.0) + for (i, j) in edges + if i == n + add_to_expression!(flow_out, flows[(i, j)]) + end + end + @constraint(model, flow_in - flow_out == demand(n)) end # This formulation includes two for-loops, with a loop over every edge (twice) for @@ -133,8 +133,8 @@ function build_naive_model(nodes, edges, demand) @constraint( model, [n in nodes], - sum(flows[(i, j)] for (i, j) in edges if i == n) - - sum(flows[(i, j)] for (i, j) in edges if j == n) == demand(n) + sum(flows[(i, j)] for (i, j) in edges if j == n) - + sum(flows[(i, j)] for (i, j) in edges if i == n) == demand(n) ) return model end @@ -179,8 +179,8 @@ model = Model() @constraint( model, [n in nodes], - sum(flows[(n, j)] for j in out_nodes[n]) - - sum(flows[(i, n)] for i in in_nodes[n]) == demand(n) + sum(flows[(i, n)] for i in in_nodes[n]) - + sum(flows[(n, j)] for j in out_nodes[n]) == demand(n) ); # The benefit of this formulation is that we now loop over `out_nodes[n]` @@ -202,8 +202,8 @@ function build_cached_model(nodes, edges, demand) @constraint( model, [n in nodes], - sum(flows[(n, j)] for j in out_nodes[n]) - - sum(flows[(i, n)] for i in in_nodes[n]) == demand(n) + sum(flows[(i, n)] for i in in_nodes[n]) - + sum(flows[(n, j)] for j in out_nodes[n]) == demand(n) ) return model end diff --git a/docs/styles/config/vocabularies/JuMP/accept.txt b/docs/styles/config/vocabularies/JuMP/accept.txt index 3a38e502e31..5c73dbd1489 100644 --- a/docs/styles/config/vocabularies/JuMP/accept.txt +++ b/docs/styles/config/vocabularies/JuMP/accept.txt @@ -137,6 +137,7 @@ Geomstats Geoopt GLPK (Gurobi|GUROBI) +gurobipy GZip Hypatia (Ipopt|IPOPT) @@ -158,6 +159,7 @@ Pavito Penbmi Penopt Plasmo +Pyomo puzzlor RAPOSa Riemopt From 6b1b1cf299ac9d214cf7a3125fe3bcbe92e4c347 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Tue, 27 Aug 2024 20:51:35 +1200 Subject: [PATCH 7/9] Update docs/src/tutorials/getting_started/sum_if.jl --- docs/src/tutorials/getting_started/sum_if.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index 5576d7d9448..668c999ec4e 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -38,7 +38,6 @@ using JuMP import Plots -import Statistics # ## Data From c340a9860b0c9cbc562af2a5267e09e1bc171b86 Mon Sep 17 00:00:00 2001 From: odow Date: Wed, 28 Aug 2024 09:36:58 +1200 Subject: [PATCH 8/9] Update --- docs/src/tutorials/getting_started/sum_if.jl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index 668c999ec4e..e01faa9c603 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -148,11 +148,12 @@ nodes, edges, demand = build_random_graph(1_000, 2_000) run_times = Float64[] factors = 1:10 for factor in factors + GC.gc() #src graph = build_random_graph(1_000 * factor, 5_000 * factor) push!(run_times, @elapsed build_naive_model(graph...)) end Plots.plot(; xlabel = "Factor", ylabel = "Runtime [s]") -Plots.plot!(factors, run_times; label = "Actual") +Plots.scatter!(factors, run_times; label = "Actual") a, b = hcat(ones(10), factors .^ 2) \ run_times Plots.plot!(factors, a .+ b * factors .^ 2; label = "Quadratic fit") @@ -183,9 +184,7 @@ model = Model() ); # The benefit of this formulation is that we now loop over `out_nodes[n]` -# rather than `edges` for each node `n`. If the graph is sparse, so that the -# number of edges attached to a node is much less than the total number of -# edges, then this can have a large performance benefit. +# rather than `edges` for each node `n`, and so the runtime is ``O(|edges|)``. # Let's build a new function to benchmark our formulation: @@ -218,16 +217,22 @@ run_times_naive = Float64[] run_times_cached = Float64[] factors = 1:10 for factor in factors + GC.gc() #src graph = build_random_graph(1_000 * factor, 5_000 * factor) push!(run_times_naive, @elapsed build_naive_model(graph...)) push!(run_times_cached, @elapsed build_cached_model(graph...)) end Plots.plot(; xlabel = "Factor", ylabel = "Runtime [s]") -Plots.plot!(factors, run_times_naive; label = "Actual") -Plots.plot!(factors, run_times_cached; label = "Cached") +Plots.scatter!(factors, run_times_naive; label = "Actual") +a, b = hcat(ones(10), factors .^ 2) \ run_times_naive +Plots.plot!(factors, a .+ b * factors .^ 2; label = "Quadratic fit") +Plots.scatter!(factors, run_times_cached; label = "Cached") +a, b = hcat(ones(10), factors) \ run_times_cached +Plots.plot!(factors, a .+ b * factors; label = "Linear fit") # Even though the cached model needs to build `in_nodes` and `out_nodes`, it is -# asymptotically faster. +# asymptotically faster than the naïve model, scaling linearly with `factor` +# rather than quadraticaly. # ## Lesson From 1a959fbe525698bd81ad85368997855c3e3229c6 Mon Sep 17 00:00:00 2001 From: Oscar Dowson Date: Wed, 28 Aug 2024 10:41:08 +1200 Subject: [PATCH 9/9] Update docs/src/tutorials/getting_started/sum_if.jl --- docs/src/tutorials/getting_started/sum_if.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/tutorials/getting_started/sum_if.jl b/docs/src/tutorials/getting_started/sum_if.jl index e01faa9c603..bc7917975b2 100644 --- a/docs/src/tutorials/getting_started/sum_if.jl +++ b/docs/src/tutorials/getting_started/sum_if.jl @@ -232,7 +232,7 @@ Plots.plot!(factors, a .+ b * factors; label = "Linear fit") # Even though the cached model needs to build `in_nodes` and `out_nodes`, it is # asymptotically faster than the naïve model, scaling linearly with `factor` -# rather than quadraticaly. +# rather than quadratically. # ## Lesson