diff --git a/src/tracker/tracker.jl b/src/tracker/tracker.jl index 40fe83a9..0f71225d 100644 --- a/src/tracker/tracker.jl +++ b/src/tracker/tracker.jl @@ -2,16 +2,17 @@ pairfloes( segmented_imgs::Vector{BitMatrix}, props::Vector{DataFrame}, - dt::Vector{Int64}, + passtimes::Vector{DateTime}, + dt::Vector{Float64}, condition_thresholds, mc_thresholds, ) -Pair floes in `props[k]` to floes in `props[k+1]` for `k=1:length(props)-1` into an array of `MatchedPairs`. +Pair floes in `props[k]` to floes in `props[k+1]` for `k=1:length(props)-1`. The main steps of the algorithm are as follows: -1. Crop floes from `segmented_imgs` using bounding box data in `props`. +1. Crop floes from `segmented_imgs` using bounding box data in `props`. Floes in the edges are removed. 2. For each floe_k_r in `props[k]`, compare to floe_k+1_s in `props[k+1]` by computing similarity ratios, set of `conditions`, and drift distance `dist`. If the conditions are met, compute the area mismatch `mm` and psi-s correlation `c` for this pair of floes. Pair these two floes if `mm` and `c` satisfy the thresholds in `mc_thresholds`. 3. If there are collisions (i.e. floe `s` in `props[k+1]` is paired to more than one floe in `props[k]`), then the floe in `props[k]` with the best match is paired to floe `s` in `props[k+1]`. 4. Drop paired floes from `props[k]` and `props[k+1]` and repeat steps 2 and 3 until there are no more floes to match in `props[k]`. @@ -20,19 +21,29 @@ The main steps of the algorithm are as follows: # Arguments - `segmented_imgs`: array of images with segmented floes. - `props`: array of dataframes containing floe properties. +- `passtimes`: array of `DateTime` objects containing the time of the image in which the floes were captured. - `dt`: array of time elapsed between images in `segmented_imgs`. - `condition_thresholds`: 3-tuple of thresholds (each a named tuple) for deciding whether to match floe `i` from day `k` to floe j from day `k+1`. - `mc_thresholds`: thresholds for area mismatch and psi-s shape correlation. -Returns an array of `MatchedPairs` containing the properties of matched floe pairs, their similarity ratios, and their distances between their centroids. +Returns a tuple `(props, trackdata)` where `props` is a long dataframe containing floe ID's, passtimes, the original set of physical properties, and their masks and `trackdata` is a dataframe containing the floe tracking data. """ function pairfloes( segmented_imgs::Vector{BitMatrix}, props::Vector{DataFrame}, + passtimes::Vector{DateTime}, dt::Vector{Float64}, condition_thresholds, mc_thresholds, ) + sort_floes_by_area!(props) + + # Assign a unique ID to each floe in each image + for (i, prop) in enumerate(props) + props[i].uuid = [randstring(12) for _ in 1:nrow(prop)] + end + + add_passtimes!(props, passtimes) # Initialize container for props of matched pairs of floes, their similarity ratios, and their distances between their centroids tracked = Tracked() @@ -105,49 +116,74 @@ function pairfloes( update!(tracked, match_total) end sort!(tracked) - return tracked.data -end + _pairs = tracked.data -# #= Floe Tagging =# -""" - lookup_value(df, key, key_column::Symbol=:ID_1, value_column::Symbol=:ID) + # Make a dict with keys in _pairs[i].props2.uuid and values in _pairs[i-1].props1.uuid + mappings = [Dict(pair.props2.uuid .=> pair.props1.uuid) for pair in _pairs] -Look up a value in a DataFrame by a `key` in column `key_column` and return the value in column `value_column`. If the key is not found, return `missing`. -""" -function lookup_value(df, key, key_column::Symbol=:ID_1, value_column::Symbol=:ID) - row = findfirst(df[:, key_column] .== key) - if row !== nothing - return df[row, value_column] - else - return missing + # Convert mappings to functions + funcsfrommappings = [x -> get(mapping, x, x) for mapping in mappings] + + # Compose functions in reverse order to push uuids forward + mapuuid = foldr((f, g) -> x -> f(g(x)), funcsfrommappings) + + for prop in props[2:end] + prop.uuid = mapuuid.(prop.uuid) + end + + # Collect all unique uuids in props[i] to label as simple ints starting from 1 + uuids = unique([uuid for prop in props for uuid in prop.uuid]) + + # create mapping from uuids to index + uuid2index = Dict(uuid => i for (i, uuid) in enumerate(uuids)) + + # apply the uuid2index mapping to props + for prop in props + prop.uuid .= [uuid2index[uuid] for uuid in prop.uuid] end + + # Merge all props into one long DataFrame + propsvert = vcat(props...) + + # rename uuid to ID + rename!(propsvert, :uuid => :ID) + + # 2. Sort propsvert by uuid and then by passtime + DataFrames.sort!(propsvert, [:ID, :passtime]) + + # 3. Move ID, passtime columns to the front + propsvert = propsvert[:, unique(["ID", "passtime", names(propsvert)...])] + + return (props = propsvert[:, names(propsvert)[1:15]], trackdata = _pairs) end + """ - tagfloes!(floedata, pairs) + add_passtimes!(props, passtimes) -Tag floes in the `data.props` by assigning a unique `Floe_ID`` to each floe in each image using pair data in `pairs`. The ID is stored in the `Floe_ID` column of the `props` DataFrame in `data`. +Add a column `passtime` to each DataFrame in `props` containing the time of the image in which the floes were captured. # Arguments -- `data::Data`: the data structure containing the images and properties of the floes (output of `IFTPipeline.extractfeatures`). The `props` field of `data` is modified in-place. -- `pairs`: the output of `pairfloes`. +- `props`: array of DataFrames containing floe properties. +- `passtimes`: array of `DateTime` objects containing the time of the image in which the floes were captured. + """ -function tagfloes!(props, pairs) - - for (i, pair) in enumerate(pairs) - getmatchingfloe(key) = lookup_value(pair, key) - insertcols!(props[i+1], 1, :Floe_ID => getmatchingfloe.(props[i+1].ID)) - DataFrames.sort!(props[i+1], :Floe_ID) - - n0 = last(props[i].ID) - _missing = ismissing.(props[i+1].Floe_ID) - _start = n0 + 1 - _end = sum(_missing) - props[i+1][_missing, :Floe_ID] = _start:_end+_start-1 +function add_passtimes!(props, passtimes) + for (i, passtime) in enumerate(passtimes) + props[i].passtime .= passtime end + nothing +end + +""" + sort_floes_by_area!(props) - # make a new column "Floe_ID" in props[1] with old ID column, delete old ID column - insertcols!(props[1], 1, :Floe_ID => props[1].ID) - select!(props[1], Not([:ID])) - return nothing +Sort floes in `props` by area in descending order. +""" +function sort_floes_by_area!(props) + for prop in props + # sort by area in descending order + DataFrames.sort!(prop, :area; rev=true) + nothing + end end diff --git a/test/test-matchcorr.jl b/test/test-matchcorr.jl index ff417b6b..abf0ec0c 100644 --- a/test/test-matchcorr.jl +++ b/test/test-matchcorr.jl @@ -36,24 +36,33 @@ # Load data data = deserialize(joinpath(path, "tracker_test_data.dat")) + passtimes = deserialize(joinpath(path, "passtimes.jls")) - # sort floe data by area - for i in 1:3 - sort!(data.props[i], :area; rev=true) + # Filtering out small floes. Algorithm performs poorly on small, amorphous floes as they seem to look similar (too `blobby`) to each other + for (i, prop) in enumerate(data.props) + data.props[i] = prop[prop[:, :area].>=350, :] end - pairs = IceFloeTracker.pairfloes( - data.imgs, data.props, dt, condition_thresholds, mc_thresholds + _pairs, _ = IceFloeTracker.pairfloes( + data.imgs, data.props, passtimes, dt, condition_thresholds, mc_thresholds ) - @test length(pairs) == 2 - r = rand(1:(length(pairs[1].props1[:, :area]))) - f1 = pairs[1].props1[r, :] - f2 = pairs[1].props2[r, :] - ratios = pairs[1].ratios[r, :] - @test sqrt( - (f1.row_centroid - f2.row_centroid)^2 + (f1.col_centroid - f2.col_centroid)^2 - ) == pairs[1].dist[r] - @test all(collect(ratios)[1:4] .< collect(t3)[2:end]) + @test maximum(_pairs.ID) == 10 + @test names(_pairs) == ["ID", + "passtime", + "area", + "min_row", + "min_col", + "max_row", + "max_col", + "row_centroid", + "col_centroid", + "convex_area", + "major_axis_length", + "minor_axis_length", + "orientation", + "perimeter", + "mask"] + @test issorted(_pairs, :ID) end end diff --git a/test/test_inputs/tracker/tracker_test_data.dat b/test/test_inputs/tracker/tracker_test_data.dat index 8ad797ae..042112b0 100644 Binary files a/test/test_inputs/tracker/tracker_test_data.dat and b/test/test_inputs/tracker/tracker_test_data.dat differ