Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds jv_unshare(), jv_is_unshared() #3109

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/jq_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,4 +490,26 @@ static void jv_test() {
//jv_dump(jv_copy(o2), 0); printf("\n");
jv_free(o2);
}

{
jv input = jv_parse("{\"key\":[{\"this\":{\"some\":\"thing\"}}, 1, [true]]}");
jv output = jv_paths(jv_copy(input));
jv_dump(input, JV_PRINT_TAB);
jv_dump(output, JV_PRINT_TAB);
}

{
jv input = jv_parse("{\"key\":{\"some\":{\"test\":\"value\"}, \"other\":\"thing\"}}");
jv add = jv_parse("{\"some\":{\"test\":\"other\"}, \"added\":\"thing\"}");
jv output = jv_addpath(jv_copy(input), JV_ARRAY(jv_string("key")), jv_copy(add));
jv_dump(input, JV_PRINT_TAB);
jv_dump(add, JV_PRINT_TAB);
jv_dump(output, JV_PRINT_TAB);
}

{
jv output = jv_unshare(jv_parse("{\"test\":[{\"some\":\"value\"}, 1, true, false, null]}"));

jv_free(output);
}
}
62 changes: 62 additions & 0 deletions src/jv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1862,6 +1862,68 @@ jv jv_object_iter_value(jv object, int iter) {
/*
* Memory management
*/
jv jv_unshare(jv input){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how ugly but i guess one could save on jv_free calls by having different ownership rule for unshare? too big foot gun?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even within the implementation of jv_unshare() we benefit from it dereferencing its input. We could have jv_unshare() not consume its memory, but at the cost of having to write a lot of stuff to buffers

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is jv_unshare() supposed to consume a reference to its argument? I think it should.

switch(jv_get_kind(input)){
case JV_KIND_INVALID:
if(!jv_invalid_has_msg(jv_copy(input))){
jv_free(input);
return jv_invalid();
}
return jv_invalid_with_msg(jv_unshare(jv_invalid_get_msg(jv_copy(input))));
case JV_KIND_OBJECT:
case JV_KIND_ARRAY:
{
jv keys = jv_keys(jv_copy(input));
size_t keys_length = jv_array_length(jv_copy(keys));

jv output_object;
if(jv_get_kind(input) == JV_KIND_OBJECT){
output_object = jv_object();
}else{
output_object = jv_array();
Copy link
Member

@wader wader May 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use jv_array_sized as we know size?

}

for(size_t i = 0; i < keys_length; i++){
jv key = JV_ARRAY(jv_unshare(jv_array_get(jv_copy(keys), i)));

output_object = jv_setpath(
output_object,key,
jv_unshare(
jv_getpath(jv_copy(output_object), jv_copy(key))
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how much faster using jv_array_set/jv_object_set would be, could skip building a path array at least?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the main reason I did it the way I did it was to save on code, but yes doing it specific for arrays and objects would probably be faster

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, i would probably try benchmark some (hopefully) real world like use cases if performance is an issue

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then it would probably be better to separate between JV_KIND_OBJECT AND JV_KIND_ARRAY

);
}

jv_free(keys);
jv_free(input);
return output_object;
}
case JV_KIND_STRING:
{
jv output_string = jv_string(jv_string_value(input));
jv_free(input);
return output_string;
}
case JV_KIND_NUMBER:
{
double val = jv_number_value(input);
jv_free(input);
return jv_number(val);
}
case JV_KIND_TRUE:
jv_free(input);
return jv_true();
case JV_KIND_FALSE:
jv_free(input);
return jv_false();
case JV_KIND_NULL:
jv_free(input);
return jv_null();
default:
return jv_invalid();
}
}

jv jv_copy(jv j) {
if (JVP_IS_ALLOCATED(j)) {
jvp_refcnt_inc(j.u.ptr);
Expand Down
4 changes: 4 additions & 0 deletions src/jv.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ jv_kind jv_get_kind(jv);
const char* jv_kind_name(jv_kind);
static int jv_is_valid(jv x) { return jv_get_kind(x) != JV_KIND_INVALID; }

//jv_unshare() creates a deep copy of the input aka the content of the output will be identical to the input, but no shared memory exists between them
jv jv_unshare(jv);
jv jv_copy(jv);
void jv_free(jv);

Expand Down Expand Up @@ -256,8 +258,10 @@ jv jv_get(jv, jv);
jv jv_set(jv, jv, jv);
jv jv_has(jv, jv);
jv jv_setpath(jv, jv, jv);
jv jv_addpath(jv, jv, jv);
jv jv_getpath(jv, jv);
jv jv_delpaths(jv, jv);
jv jv_paths(jv);
jv jv_keys(jv /*object or array*/);
jv jv_keys_unsorted(jv /*object or array*/);
int jv_cmp(jv, jv);
Expand Down
105 changes: 105 additions & 0 deletions src/jv_aux.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,70 @@ jv jv_setpath(jv root, jv path, jv value) {
return jv_set(root, pathcurr, jv_setpath(subroot, pathrest, value));
}

jv jv_addpath(jv root, jv path, jv add){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have a comment on what this is supposed to do? Is it different from jv_setpath()?

if(jv_get_kind(path) != JV_KIND_ARRAY || !jv_is_valid(add)){
jv_free(root);
jv_free(path);
jv_free(add);
return jv_invalid();
}

if(jv_get_kind(root) != JV_KIND_OBJECT && jv_get_kind(root) != JV_KIND_ARRAY){
jv_free(root);

if(!jv_equal(jv_copy(path), jv_array())){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's much faster to check if path has length 0.

jv_free(path);
jv_free(add);
return jv_invalid();
}

jv_free(path);

return add;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get this case.

Also, what if root is a jv_null()? Should we treat it as being a container-like value, like jq does?

}

if(!jv_equal(jv_copy(path), jv_array())){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

return jv_setpath(root, path, jv_addpath(jv_getpath(jv_copy(root), jv_copy(path)), jv_array(), add));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the difference between jv_setpath() and jv_addpath() now, but I'd still like a comment, and also if we don't need jv_addpath() then I'd rather not have it.

}

jv root_paths = jv_paths(jv_copy(root));
jv add_paths = jv_paths(jv_copy(add));

size_t add_paths_length = jv_array_length(jv_copy(add_paths));

for(size_t i = 0; i < add_paths_length; i++){
jv add_path = jv_array_get(jv_copy(add_paths), i);
jv add_path_value = jv_getpath(jv_copy(add), jv_copy(add_path));

if(!jv_is_valid(add_path_value) || jv_get_kind(add_path_value) == JV_KIND_NULL){
jv_free(root);
jv_free(path);
jv_free(add);
jv_free(root_paths);
jv_free(add_paths);
jv_free(add_path);
jv_free(add_path_value);
return jv_invalid();
}

if(jv_get_kind(add_path_value) == JV_KIND_OBJECT || jv_get_kind(add_path_value) == JV_KIND_ARRAY){
jv_free(add_path);
jv_free(add_path_value);
continue;
}

root = jv_setpath(root, add_path, add_path_value);
}

jv_free(path);
jv_free(add);

jv_free(root_paths);
jv_free(add_paths);

return root;
}

jv jv_getpath(jv root, jv path) {
if (jv_get_kind(path) != JV_KIND_ARRAY) {
jv_free(root);
Expand Down Expand Up @@ -538,6 +602,47 @@ static int string_cmp(const void* pa, const void* pb){
return r;
}

jv jv_paths(jv input){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have a comment on what this is supposed to do? Is it output an array of paths to scalar values?

if(jv_get_kind(input) != JV_KIND_OBJECT && jv_get_kind(input) != JV_KIND_ARRAY){
jv_free(input);
return jv_invalid();
}

jv keys = jv_keys(jv_copy(input));

size_t keys_length = jv_array_length(jv_copy(keys));

jv output = jv_array();

for(size_t i = 0; i < keys_length; i++){
jv key = jv_array_get(jv_copy(keys), i);
jv insert_paths = jv_paths(jv_getpath(jv_copy(input), JV_ARRAY(jv_copy(key))));

output = jv_array_append(output, JV_ARRAY(jv_copy(key)));

if(jv_get_kind(insert_paths) == JV_KIND_INVALID){
jv_free(insert_paths);
jv_free(key);

continue;
}

size_t paths_length = jv_array_length(jv_copy(insert_paths));

for(size_t j = 0; j < paths_length; j++){
output = jv_array_append(output, jv_array_concat(JV_ARRAY(jv_copy(key)), jv_array_get(jv_copy(insert_paths), j)));
}

jv_free(key);
jv_free(insert_paths);
}

jv_free(input);
jv_free(keys);

return output;
}

jv jv_keys_unsorted(jv x) {
if (jv_get_kind(x) != JV_KIND_OBJECT)
return jv_keys(x);
Expand Down