diff --git a/include/graph.c b/include/graph.c deleted file mode 100644 index 60cc261..0000000 --- a/include/graph.c +++ /dev/null @@ -1,159 +0,0 @@ -typedef struct{ - PyObject_HEAD - Index node_count; - Index* pre_neighbor_offsets; - PyArrayObject* edge_list; - //PyArrayObject* edge_types; -} Graph; - - -static void Graph_dealloc(Graph* graph){ - free(graph->pre_neighbor_offsets); - Py_DECREF(graph->edge_list); - Py_TYPE(graph)->tp_free((PyObject*)graph); -} - - -static PyObject* Graph_new(PyTypeObject* type, PyObject* args, PyObject* kwds){ - Index node_count; - PyArrayObject* edges; - //PyArrayObject* edge_types; - - // ASSUMPTION: edge list should be sorted in the second argument or destination - - if(!PyArg_ParseTuple(args, "OI", (PyObject**)&edges, /*(PyObject**)&edge_types,*/ &node_count)){ - puts("no new graph without correct args"); - return NULL; - } - - Graph* graph = (Graph*) type->tp_alloc(type, 0); - - if(graph!=NULL){ - // NOTE: if ASSUMPTION in count_neighbors is wrong, memory needs to be zeroed out before starting counting - graph->pre_neighbor_offsets = (Index*)malloc((node_count+1)*sizeof(Index)); - - - - ASSERT(graph->pre_neighbor_offsets); - } - - return (PyObject*)graph; -} - - - - - - - - - - -static int Graph_init(Graph* graph, PyObject* args, PyObject* kwds){ - PyArrayObject* edges; - //PyArrayObject* edge_types; - Index node_count; - - // ASSUMPTION: edge list should be sorted in the second argument - - if(!PyArg_ParseTuple(args, "OI", (PyObject**)&edges, /*(PyObject**)&edge_types,*/ &node_count)){ - puts("couldn't parse edge list"); - return -1; - } - - graph->node_count = node_count; - - graph->edge_list = edges; - - Py_INCREF(edges); - - - - - - // TODO: MT or GPU - for(Index i=0; inode_count+1; i++){ - graph->pre_neighbor_offsets[i]=0; - } - - Index edge_count = (Index)PyArray_DIM(edges, 1); - - for(Index e=0; epre_neighbor_offsets[Node_To_Index(dst_node)+1]++; - } - - neighbor_counts_to_offsets(graph->node_count, graph->pre_neighbor_offsets); - - return 0; -} - -static Node src_node_at(Graph* g, Index i){ - ASSERT(i < g->pre_neighbor_offsets[g->node_count]); - - return *((Node*)PyArray_GETPTR2(g->edge_list, 0, i)); -} - -static Node dst_node_at(Graph* g, Index i){ - ASSERT(i < g->pre_neighbor_offsets[g->node_count]); - return *((Node*)PyArray_GETPTR2(g->edge_list, 1, i)); -} - -static EdgeType edge_type_at(Graph* g, Index i){ - ASSERT(i < g->pre_neighbor_offsets[g->node_count]); - return *((EdgeType*)PyArray_GETPTR2(g->edge_list, 2, i)); -} - -static PyObject* Graph_print(Graph* graph, PyObject *Py_UNUSED(ignored)){ - for(Index i=0;inode_count;i++){ - Index c = graph->pre_neighbor_offsets[i+1]-graph->pre_neighbor_offsets[i]; - Index o=graph->pre_neighbor_offsets[i]; - - for(Index ii=0; iipre_neighbor_offsets[Node_To_Index(n)+1]-graph->pre_neighbor_offsets[Node_To_Index(n)]; - - return PyLong_FromIndex(pre_neighbor_count); -} - -// static PyObject* Graph_edge_list(Graph* graph, void* closure){ -// return (PyObject*)graph->edge_list; -// } - -static PyMethodDef Graph_methods[] = { - {"print", (PyCFunction)Graph_print, METH_NOARGS, "print the graph"}, - {"preneighborhood_count", (PyCFunction)Graph_preneighborhood_count, METH_VARARGS, "get the size of the pre-neighborhood of a node within the graph"}, - {NULL} -}; - -// static PyGetSetDef Graph_properties[] = { -// {"edge_list", Graph_edge_list, NULL, "getter for underlying edge list of Graph", NULL}, -// NULL -// }; - -static PyTypeObject GraphType = { - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "GMSamplers.Graph", - .tp_doc = PyDoc_STR("GMSamplers graph"), - .tp_basicsize = sizeof(Graph), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT, - .tp_new = Graph_new, - .tp_init = (initproc) Graph_init, - .tp_dealloc = (destructor) Graph_dealloc, - .tp_methods = Graph_methods, - //.tp_getset = Graph_properties -}; diff --git a/include/musical_sampling.c b/include/musical_sampling.c deleted file mode 100644 index 0b6465c..0000000 --- a/include/musical_sampling.c +++ /dev/null @@ -1,667 +0,0 @@ -static PyObject* random_score_region(PyObject* csamplers, PyObject* args){ - PyArrayObject* np_onsets; - PyArrayObject* np_unique_onset_indices; - Index budget; - #define uint unsigned int - - if(!PyArg_ParseTuple(args, "OOI", (PyObject**)&np_onsets, (PyObject**)&np_unique_onset_indices, (uint*)&budget)){ - printf("If you don't provide proper arguments, you can't get a random score region.\nHow can you get a random score region if you don't provide proper arguments?\n"); - return NULL; - } - - int* onsets = (int*)PyArray_DATA(np_onsets); - Index* unique_onset_indices = (Index*)PyArray_DATA(np_unique_onset_indices); - - Index perm_size = (Index)PyArray_SIZE(np_unique_onset_indices); - - Index* perm = (Index*)malloc(sizeof(Index)*perm_size); - for(Index i=0; i= PyArray_SIZE(np_onsets)){ - region_end = (Index)PyArray_SIZE(np_onsets); - break; - } - - region_end = region_start+budget; - - while(region_end-1>=region_start && onsets[region_end]==onsets[region_end-1]){ - ASSERT(region_end>=1); - - region_end--; - } - - if(region_start < region_end) - break; - - perm[rand_i] = perm[i]; - } - - free(perm); - - return PyTuple_Pack(2, PyLong_FromIndex(region_start), PyLong_FromIndex(region_end)); -} - - - - -static PyObject* extend_score_region_via_neighbor_sampling(PyObject* csamplers, PyObject* args){ - Graph* graph; - PyArrayObject* np_onsets; - PyArrayObject* np_durations; - PyArrayObject* np_endtimes_cummax; - Index region_start; - Index region_end; - Index samples_per_node; - int sample_rightmost; - - if(!PyArg_ParseTuple(args, "OOOOIIIp", (PyObject**)&graph, (PyObject**)&np_onsets, (PyObject**)&np_durations, (PyObject**)&np_endtimes_cummax, (uint*)®ion_start, (uint*)®ion_end, (uint*)&samples_per_node, &sample_rightmost)){ - printf("If you don't provide proper arguments, you can't extend a score region via neighbor sampling.\nHow can you extend a score region via neighbor sampling if you don't provide proper arguments?\n"); - return NULL; - } - - HashSet samples, node_tracker; - HashSet_new(&samples, region_end-region_start); - HashSet_new(&node_tracker, samples_per_node); - - - PyArrayObject* left_extension; - PyArrayObject* left_edges; - PyArrayObject* right_extension; - PyArrayObject* right_edges; - - Index edge_list_size = region_end-region_start; - Node* edge_list = (Node*)malloc(3*sizeof(Node)*edge_list_size); - ASSERT(edge_list); - Index edge_list_cursor; - - - int* onsets = (int*)PyArray_DATA(np_onsets); - int* durations = (int*)PyArray_DATA(np_durations); - int* endtimes_cummax = (int*)PyArray_DATA(np_endtimes_cummax); - - if(region_start > 0){ - int onset_ref = -1; - HashSet_init(&samples); - edge_list_cursor = 0; - - for(Index j=region_start; j endtimes_cummax[region_start-1]){ - if(onset_ref>=0){ - if(onset_ref != onsets[j]) - break; - } - else{ - onset_ref = onsets[j]; - } - } - - Index offset = graph->pre_neighbor_offsets[j]; - Index pre_neighbor_count = graph->pre_neighbor_offsets[j+1]-graph->pre_neighbor_offsets[j]; - - Index marker = 0; - - while(marker < pre_neighbor_count && Node_To_Index(src_node_at(graph, offset+marker)) < region_start) - marker++; - - Index predict_cursor = edge_list_cursor + MACRO_MIN(marker, samples_per_node); - - if(predict_cursor >= edge_list_size){ - Index new_size = (Index)(edge_list_size*1.5f); - - while(predict_cursor>=new_size) - new_size = (Index)(new_size*1.5f); - - Node* tmp = edge_list; - edge_list = (Node*)malloc(3*sizeof(Node)*new_size); - - ASSERT(edge_list); - - memcpy(edge_list, tmp, 3*sizeof(Node)*edge_list_size); - - free(tmp); - edge_list_size = new_size; - } - - if(marker <= samples_per_node){ - for(Index i=0; i (Index)(0.75*marker)){ - Index* perm = (Index*)malloc(sizeof(Index)*marker); - - ASSERT(perm); - - for(Index i=0; i=(Int)region_start; i--){ - if(endtimes_cummax[i] <= onsets[region_end-1]) - break; - - Index marker = region_end; - - while(marker < PyArray_SIZE(np_onsets) && onsets[marker] <= onsets[i]+durations[i]) - marker++; - - if(((marker < PyArray_SIZE(np_onsets)) & (marker >= 1)) && onsets[marker-1] < onsets[i]+durations[i]) - marker++; - - Index predict_cursor = edge_list_cursor + MACRO_MIN(marker-region_end, samples_per_node); - - if(predict_cursor >= edge_list_size){ - Index new_size = (Index)(edge_list_size*1.5f); - - while(predict_cursor>=new_size) - new_size = (Index)(new_size*1.5f); - - Node* tmp = edge_list; - edge_list = (Node*)malloc(3*sizeof(Node)*new_size); - - ASSERT(edge_list); - - memcpy(edge_list, tmp, 3*sizeof(Node)*edge_list_size); - - free(tmp); - edge_list_size = new_size; - } - - if(marker-region_end <= samples_per_node){ - for(Node j=region_end; j (Index)(0.75*(marker-region_end))){ - Node* perm = (Node*)malloc(sizeof(Node)*(marker-region_end)); - - ASSERT(perm); - - for(Index j=0; j edge_list[3*edge_list_cursor+2] = Onset - // onsets[node_sample]==onsets[i]+durations[i] => edge_list[3*edge_list_cursor+2] = Consecutive - // etc. - - - edge_list_cursor++; - - perm[rand_j]=perm[j]; - } - - free(perm); - } - else{ //rejection sampling - HashSet_init(&node_tracker); - - for(Index sample=0; sample0; layer--){ - Index edge_list_cursor=0; - - HashSet_init(&node_hash_set); - - Node* prev_layer_nodes = (Node*)PyArray_DATA(prev_layer); - - Index prev_size = (Index)PyArray_SIZE(prev_layer); - for(Index n=0; n 0 && onsets[lower_bound]==onsets[i]) - lower_bound--; - - if(lower_bound > 0) - lower_bound++; - else if(onsets[0]!=onsets[i]) - lower_bound = 1; - } - - if(i == PyArray_SIZE(np_onsets)-1){ - upper_bound = (Index)PyArray_SIZE(np_onsets); - } - else{ - upper_bound = i+1; - - while(upper_bound < PyArray_SIZE(np_onsets) && onsets[upper_bound] < onsets[i] + durations[i]) - upper_bound++; - - if(upper_bound < PyArray_SIZE(np_onsets)) - upper_bound++; - } - - Index neighbor_count = upper_bound-lower_bound; - - if(neighbor_count <= samples_per_node){ - for(Index j=lower_bound; j (Index)(0.75*neighbor_count)){ - Node* perm = (Node*)malloc(sizeof(Node)*neighbor_count); - - ASSERT(perm); - - for(Index ix=0; ixpre_neighbor_offsets[j]; - Index pre_neighbor_count = graph->pre_neighbor_offsets[j+1]-offset; - - Index intersection_start = 0; - - while(intersection_start < pre_neighbor_count && Node_To_Index(src_node_at(graph, offset+intersection_start)) < region_start) - intersection_start++; - - Index intersection_end = intersection_start+1; - - while(intersection_end < pre_neighbor_count && Node_To_Index(src_node_at(graph, offset+intersection_end)) < region_end) - intersection_end++; - - Index intersection_count = intersection_end - intersection_start; - - - if(intersection_count <= samples_per_node){ - for(Index ix=intersection_start; ix < intersection_end; ix++){ - Node i = src_node_at(graph, offset + ix); - HashSet_add_node(&samples, i); - - ASSERT(edge_list_cursor < edge_list_size); - - edge_list[3*edge_list_cursor]=i; - edge_list[3*edge_list_cursor+1]=Index_To_Node(j); - edge_list[3*edge_list_cursor+2]=edge_type_at(graph, offset + ix); - - edge_list_cursor++; - } - } - /* - expected number of attempts to insert a unique sample into set with k elements is n/(n-k) - for k=n*f, this results in 1/(1-f), meaning, if we want to limit the expected number of attempts - to let's say 4, f has to be at most 3/4=0.75 - - if this threshold is reached, random subset is sampled via random permutation - this is viable since memory waste is at most 25% (for temporary storage) - */ - else if(samples_per_node > (Index)(0.75*intersection_count)){ - Index* perm = (Index*)malloc(sizeof(Index)*intersection_count); - - ASSERT(perm); - - for(Index ix=0; ixtracked; i++) - if(n == nt->nodes[i]) - return i; - - return nt->capacity; -} - -static bool NodeTracker_add_succesfully(NodeTracker* nt, Node n){ - if(NodeTracker_index(nt, n) < nt->capacity) - return false; - - nt->nodes[nt->tracked++] = n; - return true; -} - - - -#define NodeHashSet_bucket_count 23 //should be prime - -typedef struct{ - Index capacity; - Index size; - NodeTracker buckets[NodeHashSet_bucket_count]; -}NodeHashSet; - -// ASSUMPTION: capacity is already set -static void NodeHashSet_init(NodeHashSet* node_hash_set){ - assert(node_hash_set->capacity > 0); - assert(node_hash_set->capacity % NodeHashSet_bucket_count == 0); - assert(node_hash_set->buckets[0].nodes); - - uint nodes_per_bucket = node_hash_set->capacity/NodeHashSet_bucket_count; - - node_hash_set->buckets[0].capacity = nodes_per_bucket; - node_hash_set->buckets[0].tracked = 0; - - node_hash_set->size=0; - - for(uint b=1; bbuckets[b].capacity = nodes_per_bucket; - node_hash_set->buckets[b].tracked = 0; - node_hash_set->buckets[b].nodes = node_hash_set->buckets[b-1].nodes + nodes_per_bucket; - } -} - -static void NodeHashSet_new(NodeHashSet* node_hash_set, Index min_capacity){ - Index init_nodes_per_bucket = (min_capacity/NodeHashSet_bucket_count + 1); - Index capacity = NodeHashSet_bucket_count*init_nodes_per_bucket; - node_hash_set->buckets[0].nodes = (Node*)malloc(sizeof(Node)*capacity); - - assert(node_hash_set->buckets[0].nodes); - - node_hash_set->capacity = capacity; -} - -static void NodeHashSet_new_init(NodeHashSet* node_hash_set, Index min_capacity){ - NodeHashSet_new(node_hash_set, min_capacity); - - NodeHashSet_init(node_hash_set); -} - -static uint NodeHashSet_capacity(NodeHashSet* node_hash_set){ - uint capacity=0; - - for(uint b=0; bbuckets[b].capacity; - - node_hash_set->capacity = capacity; - - return capacity; -} - -static uint NodeHashSet_size(NodeHashSet* node_hash_set){ - uint size=0; - - for(uint b=0; bbuckets[b].tracked; - - node_hash_set->size = size; - - return size; -} - -static bool NodeHashSet_is_full(NodeHashSet* nhs){ - for(uint b=0; bbuckets[b].tracked < nhs->buckets[b].capacity) - return false; - - return true; -} - - -static bool NodeHashSet_add_succesfully(NodeHashSet* nhs, Node n){ - int bucket_index = (int)Node_hash(n)%NodeHashSet_bucket_count; - - NodeTracker* bucket_tracker = nhs->buckets+bucket_index; - - // Node* memory_offset = nhs->buckets[0].nodes; - - // bucket_tracker->nodes += memory_offset; - - Index index = NodeTracker_index(bucket_tracker, n); - - // bucket_tracker->nodes -= memory_offset; - - if(index < bucket_tracker->capacity){ - return false; - } - - if(nhs->size == nhs->capacity){ - const uint grow_factor = 2; - - Node* new_nodes = (Node*)malloc(sizeof(Node)*grow_factor*nhs->capacity); - - if(new_nodes == NULL){ - // TODO: should be handled with an error msg - puts("oops no new memory"); - return false; - } - - printf("resizing from %u to %u\n", nhs->capacity, nhs->capacity*grow_factor); - - Index cursor=0; - - Node* mem_to_free = nhs->buckets[0].nodes; - - for(uint b=0; bbuckets + b; - - bucket_tracker->capacity *= grow_factor; - - Node* backup = bucket_tracker->nodes; - - bucket_tracker->nodes = new_nodes + cursor; - - for(Index t=0; ttracked; t++) - bucket_tracker->nodes[t] = backup[t]; - - cursor+=bucket_tracker->capacity; - } - - free(mem_to_free); - - - nhs->capacity *= grow_factor; - } - - if(bucket_tracker->tracked == bucket_tracker->capacity){ - int eviction_index = bucket_index; - - for(int b=bucket_index+1; bbuckets+b; - - if(bt->trackedcapacity){ - eviction_index = b; - break; - } - } - - if(eviction_index == bucket_index){ - for(int b=bucket_index-1; b>=0;b--){ - NodeTracker* bt = nhs->buckets+b; - - if(bt->trackedcapacity){ - eviction_index = b; - break; - } - } - } - - assert(eviction_index != bucket_index); - - (nhs->buckets+eviction_index)->capacity--; - - int dir = (eviction_index < bucket_index)? 1 : -1; - - while(eviction_index != bucket_index){ - NodeTracker* full_bt = nhs->buckets + eviction_index + dir; - NodeTracker* bt = nhs->buckets + eviction_index; - - NodeTracker* indirect_bt; - Int write_index, read_index; - - /* - The idea here is: - 1. take the bucket with the higher address - 2. read in the direction from the end of the bucket - so if dir = 1 = ->, read at last index - if dir = -1 = <-, read at first index - 3. write in the opposite direction, this time specifically at the end of non-empty bucket - from the perspective of higher address bucket, hence the -1 in first branch - - if(dir==1){ - indirect_bt = full_bt; - read_index = full_bt->tracked-1; - write_index = -1; - - } - else{ - indirect_bt = bt; - read_index = 0; - write_index = bt->tracked; - - } - */ - - - indirect_bt = MACRO_MAX(full_bt, bt); //TODO: should be completely branchless otherwise the branch in the comment above can be used for "more" readability - read_index = ((Index)(dir==1))*indirect_bt->tracked - (Index)(dir==1); - write_index = ((Index)(dir!=1))*indirect_bt->tracked - (Index)(dir==1); - - // indirect_bt->nodes+=memory_offset; - - indirect_bt->nodes[write_index] = indirect_bt->nodes[read_index]; - - indirect_bt->nodes-=dir; - - // indirect_bt->nodes-=memory_offset; - - /* NOTE: this isn't necessary since intermediate buckets keep their capacity - only the first and last bucket tradeoff capacity - however, this is left in a comment for completion - - full_bt->capacity++; - bt->capacity--; - */ - - eviction_index+=dir; - } - - bucket_tracker->capacity++; - } - - // bucket_tracker->nodes += memory_offset; - - bucket_tracker->nodes[bucket_tracker->tracked++] = n; - - // bucket_tracker->nodes -= memory_offset; - - nhs->size++; - - return true; -} - -static PyArrayObject* NodeHashSet_to_numpy(NodeHashSet* node_hash_set){ - PyArrayObject* np_arr = new_node_numpy(NodeHashSet_size(node_hash_set)); - - Node* copy_dst = PyArray_DATA(np_arr); - - for(uint b=0; bbuckets[b].tracked; n++) - *copy_dst++ = node_hash_set->buckets[b].nodes[n]; - - - return np_arr; -} \ No newline at end of file diff --git a/include/utils.c b/include/utils.c index 8229be3..5f3863b 100644 --- a/include/utils.c +++ b/include/utils.c @@ -9,30 +9,6 @@ ASSERT(ASSERT_POW2_BIT_COUNTER==1); \ } #endif -#ifdef GM_DEBUG_OFF -#define ASSERT_POW2(N) -#endif - -// ASSUMPTION: N is a power of 2 -// then any odd number is co-prime with N -static Key skip_hash(Key k, Key N){ - ASSERT_POW2(N); - - return 2*k+1; -} - -// dark magic from https://stackoverflow.com/questions/14291172/finding-the-smallest-power-of-2-greater-than-n -// further explanations: https://stackoverflow.com/questions/364985/algorithm-for-finding-the-smallest-power-of-two-thats-greater-or-equal-to-a-giv -static Key next_pow2(Key n){ - n+=(n==0); - n--; - n|=n>>1; - n|=n>>2; - n|=n>>4; - n|=n>>8; - n|=n>>16; - return n+1; -} #define MOD_POW2(K, P2) ((K)&(P2-1)) diff --git a/requirements.txt b/requirements.txt index 13744b0..f807950 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -partitura>=1.3.0 -cython==0.29.32 +partitura>=1.5.0 psutil==5.9.5 -numpy==1.23.1 +numpy>=1.23.1 diff --git a/setup.py b/setup.py index f668c1e..bb5c6ec 100644 --- a/setup.py +++ b/setup.py @@ -4,10 +4,10 @@ dirname = os.path.dirname(__file__) - +# if os.name=='posix': print("Compiling for POSIX systems. . .") - eca = ["-std=c11"] + eca = [] # ["-std=c11"] eca.append("-DPOSIX") # from psutil import cpu_count @@ -19,11 +19,12 @@ elif sys.platform.startswith('win'): print("Compiling for Windows. . .") - eca = ["/std:c11"] + eca = [] # ["/std:c11"] eca.append("-DWindows") else: - raise Exception("Unsupported OS, please use Linux or Windows.") + eca = [] + # raise Exception("Unsupported OS, please use Linux or Windows.") # add flag to turn off debug mode (increasing speed) eca.append("-DGM_DEBUG_OFF") @@ -33,15 +34,19 @@ ext_modules = [ setuptools.Extension( name="graphmuse.samplers.csamplers", sources=[os.path.join("src", "gmsamplersmodule.c")], extra_compile_args = eca, - extra_link_args = [])] + extra_link_args = [], include_dirs=[os.path.join(numpy.get_include(), "numpy"), "include"])] + +# os.environ["CC"] = "gcc" +# os.environ["CXX"] = "gcc" -os.environ["CC"] = "gcc" -os.environ["CXX"] = "gcc" +long_description = open(os.path.join(os.path.dirname(__file__), 'README.md'), "r").read() setuptools.setup( name='graphmuse', version='0.0.1', description='Graph Deep Learning for Music', + long_description=long_description, + long_description_content_type='text/markdown', packages=setuptools.find_packages(), classifiers=[ "Development Status :: 0 - Gamma", @@ -50,9 +55,9 @@ "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Graph Deep Learning", ], - include_dirs=[os.path.join(numpy.get_include(), "numpy"), "include", "../miniconda3/include/libxml2/libxml"], + # , "../miniconda3/include/libxml2/libxml"], # ext_modules=[module], ext_modules= ext_modules, author='Emmanouil Karystinaios, Nimrod Varga', - maintainer='Emmanouil Karystinaios, Nimrod Varga' + maintainer='Emmanouil Karystinaios' ) \ No newline at end of file diff --git a/speed_comp.py b/speed_comp.py deleted file mode 100644 index 66d3d72..0000000 --- a/speed_comp.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy -import graphmuse.samplers as gm_sampling -import time - -import psutil - -for i in range(2,10): - print("available cores: ", len(psutil.Process().cpu_affinity())//2) - - - N = 10**(i//2+1) - - - - E = numpy.random.randint(1,max(N**2,2),1)[0] - - edges = numpy.random.randint(0,N,(2, E),dtype=numpy.uint32) - - edges = sorted(list(set([(edges[0,i], edges[1,i]) for i in range(E)])), key=lambda t:t[1]) - - V = numpy.max(edges) - - - - edges = numpy.array(edges).T - - #dgl_graph = dgl.graph((torch.from_numpy(edges[0].astype(numpy.int32)),torch.from_numpy(edges[0].astype(numpy.int32)))) - - gm_graph = gm_sampling.graph(edges) - - samples_per_node = numpy.random.randint(max(N//100,2),max(N//10,2),1)[0] - - print(f"N: {N}, E: {E}, V: {V}, S: {samples_per_node}") - - targets = numpy.random.choice(edges[1], min(samples_per_node,len(edges[1])), replace=False) - - t0 = time.perf_counter() - - _ = gm_sampling.sample_nodewise_mt_static(gm_graph, 2, samples_per_node, targets) - - t1 = time.perf_counter() - - #_ = dgl.sampling.sample_neighbors(dgl_graph, targets.astype(numpy.int32), samples_per_node) - _ = gm_sampling.sample_nodewise(gm_graph, 2, samples_per_node, targets) - - t2 = time.perf_counter() - - print("MT vs ST:", t1-t0,t2-t1) \ No newline at end of file diff --git a/src/gmsamplersmodule.c b/src/gmsamplersmodule.c index 4e1de69..758a99b 100644 --- a/src/gmsamplersmodule.c +++ b/src/gmsamplersmodule.c @@ -37,227 +37,18 @@ typedef Node EdgeType; #define Node_To_Index(n) ((Index)(n)) #define Index_To_Node(i) ((Node)(i)) -static Index power(Index base, Index exponent){ - Index p = 1; - while(exponent--) - p*=base; - - return p; -} - - -static PyArrayObject* new_node_numpy(Index size){ - const npy_intp dims = (npy_intp)size; - return (PyArrayObject*)PyArray_SimpleNew(1, &dims, Node_Eqv_In_Numpy); -} - -static PyArrayObject* numpy_edge_list(Node* edge_list, Index edge_list_size){ - const npy_intp dims[2] = {3,edge_list_size}; - - const npy_intp strides[2] = {sizeof(Node), 3*sizeof(Node)}; - - PyTypeObject* subtype = &PyArray_Type; - - PyArrayObject* result = (PyArrayObject*)PyArray_New(subtype, 2, dims, Node_Eqv_In_Numpy, strides, NULL, 0, NPY_ARRAY_C_CONTIGUOUS, NULL); - - if(edge_list_size > 0) - memcpy(PyArray_DATA(result), edge_list, 3*sizeof(Node)*edge_list_size); - - return result; -} - -//#define GM_DEBUG_OFF -#include #include -#include +#include #ifdef Thread_Count_Arg #include #include -#include -#endif - - - -// this should be fine actually since it is used on random nodes -static Key Node_hash(Node n){ - return (Key)n; -} - -static bool HashSet_add_node(HashSet* hs, Node n){ - return HashSet_add(hs, Node_hash(n)); -} - -// static bool HashSet_is_node_in(HashSet* hs, Node n){ -// return HashSet_is_in(hs, Node_hash(n)); -// } - -static PyArrayObject* HashSet_to_numpy(HashSet* hash_set){ - PyArrayObject* np_arr = new_node_numpy((Index)hash_set->size); - - Node* copy_dst = PyArray_DATA(np_arr); - - HashSet_copy(hash_set, copy_dst, PyArray_SIZE(np_arr)); - - return np_arr; -} - - - -#ifdef Thread_Count_Arg - -static bool MT_HashSet_Static_add_node(MT_HashSet_Static* hs, Node n){ - return MT_HashSet_Static_add(hs, Node_hash(n)); -} - - -static PyArrayObject* MT_HashSet_Static_to_numpy(MT_HashSet_Static* hash_set){ - PyArrayObject* np_arr = new_node_numpy((Index)MT_HashSet_Static_size(hash_set)); - - Node* copy_dst = PyArray_DATA(np_arr); - - MT_HashSet_Static_copy(hash_set, copy_dst, PyArray_SIZE(np_arr)); - - return np_arr; -} - - -Threadpool* GMSamplers_thread_pool; #endif -/* -static void count_neighbors(Index from, Index to, Index node_count, Index* neighbor_counts, float* onset_beat, float* duration_beat, float* pitch){ - for(Index i=from; inode_count; i++){ - // can be localized - pre_neighbor_count += (Index)(onset_div[i]==onset_div[j]); - - - pre_neighbor_count += (Index)(onset_div[i] + duration_div[i] ==onset_div[j]); - - // can be localized - pre_neighbor_count += (Index)((onset_div[i] < onset_div[j]) & (onset_div[j] < onset_div[i] + duration_div[i])); - - if(onset_div[j] > onset_div[i] + duration_div[i]){ - for(Index k=0; knode_count; k++){ - if(((onset_div[i] + duration_div[i] < onset_div[k]) & (onset_div[k] < onset_div[j])) | (onset_div[i] + duration_div[i] == onset_div[k])) - goto OUTER_LOOP; - } - - pre_neighbor_count++; - } - - OUTER_LOOP:; - } - - - // ASSUMPTION: the adjecancy checks below also apply to j itself - // which means calloc is totally unnecessary - // also, init value 1 - Index pre_neighbor_count = 1; - - //1.: count all onset_divs that are equal to onset_div[j] to the right of j - for(Index i = j+1; inode_count; i++){ - if(onset_div[i] != onset_div[j]) - break; - - pre_neighbor_count++; - } - - //2.: count all onset_divs that are equal to onset_div[j] to the left of j - Int i = ((Int)j)-1; - - for(; i>=0; i--){ - if(onset_div[(Index)i] != onset_div[j]) - break; - - pre_neighbor_count++; - } - - - - pre_neighbor_count += (Index)(duration_div[(Index)i] > 0); - - for(; i>=0; i--){ - if(onset_div[i] + max_duration_div < onset_div[j]) - break; - - pre_neighbor_count += (Index)(onset_div[(Index)i] + duration_div[(Index)i] >= onset_div[j]); - } - - graph->pre_neighbor_offsets[i+1] = pre_neighbor_count; - } -} -*/ // TODO(?): make sure to minimize false sharing on neighbor_counts @@ -675,1314 +466,52 @@ static PyObject* GMSamplers_compute_edge_list(PyObject* csamplers, PyObject* arg } -#include - -#include - - - - - - - - - - -static PyArrayObject* index_array_to_numpy(Index* indices, Index size){ - const npy_intp dims = size; - PyArrayObject* np_arr = (PyArrayObject*)PyArray_SimpleNew(1, &dims, Index_Eqv_In_Numpy); +static PyObject* GM_set_seed(PyObject* csamplers, PyObject* args){ + int64_t seed; - if(size > 0){ - Index* np_indices = (Index*)PyArray_DATA(np_arr); - memcpy(np_indices, indices, sizeof(Index)*size); + if(!PyArg_ParseTuple(args, "I", &seed)){ + printf("If you don't provide proper arguments, you can't have any neighbor sampling.\nHow can you have any neighbor sampling if you don't provide proper arguments?\n"); + return NULL; } - return np_arr; -} - -// static void write_node_at(Node n, PyArrayObject* np_arr, npy_intp index){ -// *((Node*)PyArray_GETPTR1(np_arr, index)) = n; -// } - - - - - - - - - - -// static Index binary_search(Node n, Node* non_decr_list, Index size){ -// Index l=0, r=size-1; - -// while(l<=r){ -// Index probe = (l+r)/2; - -// if(non_decr_list[probe] < n) -// l=probe+1; -// else if(non_decr_list[probe] > n) -// r=probe-1; -// else -// return probe; -// } - -// return size; -// } - - - -// static bool is_subset_of(Node* lhs, Index lhs_size, Node* rhs, Index rhs_size){ -// //ASSUMPTION: lhs and rhs are sorted in ascending order and there are no repeated elements - -// Index cursor=binary_search(lhs[0], rhs, rhs_size); - -// if(cursor == rhs_size) -// return false; + srand(seed); -// for(Index i=1; i < lhs_size; i++){ -// cursor+=(binary_search(lhs[i], rhs+cursor+1, rhs_size-cursor-1)+1); + Py_RETURN_NONE; +} -// if(cursor == rhs_size) -// return false; -// } -// return true; -// } -#ifdef Thread_Count_Arg -struct SampleNodewiseLocals{ - Node sample_src; - uint depth; +static PyMethodDef GMSamplersMethods[] = { + {"compute_edge_list", GMSamplers_compute_edge_list, METH_VARARGS, "Compute edge list from onset_div and duration_div."}, + {"c_set_seed", GM_set_seed, METH_VARARGS, ""}, + {NULL, NULL, 0, NULL} }; -struct SampleNodewiseShared{ - Graph* graph; - uint target_depth; - Index samples_per_node; - MT_HashSet_Static* hashset_per_layer; - Index** edgeindices_per_layer; - _Atomic Index* edgeindices_cursor_per_layer; - - Index init_size; +static struct PyModuleDef GMSamplersmodule = { + PyModuleDef_HEAD_INIT, + "csamplers", + NULL, + -1, + GMSamplersMethods }; -static void sample_nodewise_mt_static_job(void* shared_args, void* local_args, struct Thread_ID ID, Stack* jobstack, Mutex* jobstack_mutex){ - //Mutex_lock(&GM_mutex); - - // printf("TID: %u", ID.value); - // fflush(stdout); - - struct SampleNodewiseLocals* local = (struct SampleNodewiseLocals*)local_args; - struct SampleNodewiseShared* shared = (struct SampleNodewiseShared*)shared_args; - - struct SampleNodewiseLocals to_push; - to_push.depth = local->depth+1; - - //printf("TID %u: %u %u %u\n", ID.value, Node_To_Index(local->sample_src), Node_To_Index(local->sample_src)+1,shared->graph->node_count+1); - - ASSERT(Node_To_Index(local->sample_src) < shared->graph->node_count+1); - ASSERT(Node_To_Index(local->sample_src)+1 < shared->graph->node_count+1); - - Index offset = shared->graph->pre_neighbor_offsets[Node_To_Index(local->sample_src)]; - Index pre_neighbor_count = shared->graph->pre_neighbor_offsets[Node_To_Index(local->sample_src)+1]-offset; - - - - - - MT_HashSet_Static* hash_set = shared->hashset_per_layer+local->depth; - Index* edge_indices = shared->edgeindices_per_layer[local->depth]; - _Atomic Index* edgeindices_cursor = shared->edgeindices_cursor_per_layer+local->depth; - - if(pre_neighbor_count <= shared->samples_per_node){ - for(Index i=0; igraph, offset + i); - - if(MT_HashSet_Static_add_node(hash_set, pre_neighbor)){ - if(to_push.depth < shared->target_depth){ - to_push.sample_src = pre_neighbor; - Mutex_lock(jobstack_mutex); - bool success = Stack_push(jobstack, &to_push); - ASSERT(success); - Mutex_unlock(jobstack_mutex); - } - - - } - - Index edge_index = Atomic_increment(edgeindices_cursor, memory_order_relaxed); - - ASSERT(edge_index < shared->init_size*power(shared->samples_per_node, local->depth+1)); - - edge_indices[edge_index] = offset + i; - } - } - /* - expected number of attempts to insert a unique sample into set with k elements is n/(n-k) - for k=n*f, this results in 1/(1-f), meaning, if we want to limit the expected number of attempts - to let's say 4, f has to be at most 3/4=0.75 - - if this threshold is reached, random subset is sampled via random permutation - this is viable since memory waste is at most 25% (for temporary storage) - */ - else if(shared->samples_per_node > (uint)(0.75*pre_neighbor_count)){ - //printf("\tpermutes with pnc %u", pre_neighbor_count); - Index* perm = (Index*)malloc(sizeof(Index)*pre_neighbor_count); - - ASSERT(perm); - - for(Index i=0; isamples_per_node; i++){ - Index rand_i = i + rand()%(pre_neighbor_count-i); - - Node node_sample = src_node_at(shared->graph, offset + perm[rand_i]); - - - - if(MT_HashSet_Static_add_node(hash_set, node_sample)){ - - - if(to_push.depth < shared->target_depth){ - to_push.sample_src = node_sample; - Mutex_lock(jobstack_mutex); - bool success = Stack_push(jobstack, &to_push); - ASSERT(success); - Mutex_unlock(jobstack_mutex); - } - } - - Index edge_index = Atomic_increment(edgeindices_cursor, memory_order_relaxed); - - ASSERT(edge_index < shared->init_size*power(shared->samples_per_node, local->depth+1)); - - edge_indices[edge_index] = offset + perm[rand_i]; - - - - perm[rand_i]=perm[i]; - } - - free(perm); - } - else{ - //printf("\thashes with pnc %u", pre_neighbor_count); - HashSet node_tracker; - HashSet_new(&node_tracker, shared->samples_per_node); - HashSet_init(&node_tracker); - - for(uint sample=0; samplesamples_per_node; sample++){ - Index edge_index; - - Node node_sample; - - for(;;){ - edge_index = rand()%pre_neighbor_count; - node_sample = src_node_at(shared->graph, offset + edge_index); - if(HashSet_add_node(&node_tracker, node_sample)) - break; - } - - if(MT_HashSet_Static_add_node(hash_set, node_sample)){ - - - if(to_push.depth < shared->target_depth){ - to_push.sample_src = node_sample; - Mutex_lock(jobstack_mutex); - bool success = Stack_push(jobstack, &to_push); - ASSERT(success); - Mutex_unlock(jobstack_mutex); - } - } - - Index edge_index2 = Atomic_increment(edgeindices_cursor, memory_order_relaxed); - - ASSERT(edge_index2 < shared->init_size*power(shared->samples_per_node, local->depth+1)); - - edge_indices[edge_index2] = offset + edge_index; - - - - } - - HashSet_free(&node_tracker); - } - - //puts(""); - //Mutex_unlock(&GM_mutex); -} - - -static PyObject* GMSamplers_sample_nodewise_mt_static(PyObject* csamplers, PyObject* args){ - uint depth; - Index samples_per_node; - PyArrayObject* target_nodes = NULL; - Graph* graph; - - if(!PyArg_ParseTuple(args, "OII|O", (PyObject**)&graph, &depth, (uint*)&samples_per_node, (PyObject**)&target_nodes)){ - printf("If you don't provide proper arguments, you can't have any neighbor sampling.\nHow can you have any neighbor sampling if you don't provide proper arguments?\n"); - return NULL; - } - - PyObject* samples_per_layer = PyList_New(depth+1); - PyObject* load_per_layer = PyList_New(depth); - PyObject* edge_indices_between_layers = PyList_New(depth); - - - if((samples_per_layer == NULL) | (load_per_layer == NULL) | (edge_indices_between_layers == NULL)){ - printf("can't create return pylists\n"); +PyMODINIT_FUNC PyInit_csamplers(){ + import_array(); - Py_XDECREF(samples_per_layer); - Py_XDECREF(load_per_layer); - Py_XDECREF(edge_indices_between_layers); + PyObject* module = PyModule_Create(&GMSamplersmodule); + if(module==NULL) return NULL; - } - - - - - - - - GMSamplers_thread_pool->sync_handle->job_process = sample_nodewise_mt_static_job; - - bool success = Threadpool_prepare_jobstack(GMSamplers_thread_pool, Thread_Count_Arg+1, sizeof(struct SampleNodewiseLocals)); - ASSERT(success); - - - - PyArrayObject* init_layer; - - HashSet hash_set; - HashSet_new(&hash_set, samples_per_node); - int target_nodes_references=0; - if((target_nodes == NULL) | ((PyObject*)target_nodes == Py_None)){ - init_layer = new_node_numpy(MACRO_MIN(samples_per_node,graph->node_count)); - - if(init_layer == NULL){ - puts("couldn't create initial layer"); - return NULL; - } - - Node* init_nodes = (Node*)PyArray_DATA(init_layer); - - struct SampleNodewiseLocals to_push; - to_push.depth = 0; + #ifdef Thread_Count_Arg + GMSamplers_thread_pool = (Threadpool*)malloc(sizeof(Threadpool) + sizeof(Stack) + sizeof(SynchronizationHandle)); - - HashSet_init(&hash_set); + Stack* q = (Stack*)(GMSamplers_thread_pool+1); - for(uint sample=0; samplenode_count; // TODO: need to make sure these are valid samples - if(HashSet_add_node(&hash_set, node_sample)) - break; - } - - *init_nodes++ = node_sample; - to_push.sample_src = node_sample; - - Stack_push(GMSamplers_thread_pool->sync_handle->job_stack, &to_push); - } - } - else{ - //TODO: what to do if target_nodes is not owned by the caller? - //TODO: what if target nodes doesnt have shape (N,) or (N,1)? - // Py_INCREF(target_nodes); - // target_nodes_references++; - - struct SampleNodewiseLocals to_push; - to_push.depth = 0; - - Node* raw_target_nodes = PyArray_DATA(target_nodes); - - - - for(uint n = 0 ; n < PyArray_SIZE(target_nodes); n++){ - to_push.sample_src = raw_target_nodes[n]; - Stack_push(GMSamplers_thread_pool->sync_handle->job_stack, &to_push); - } - - - init_layer = target_nodes; - } - - - struct SampleNodewiseShared shared; - shared.graph = graph; - shared.target_depth = depth; - shared.samples_per_node = samples_per_node; - shared.init_size = PyArray_SIZE(init_layer); - - size_t edge_indices_amount = PyArray_SIZE(init_layer); - - if(samples_per_node == 1) - edge_indices_amount*=depth; - else - edge_indices_amount*=(power(samples_per_node, depth+1)-samples_per_node)/(samples_per_node-1); - - - - void* all_memory = malloc(depth*(sizeof(MT_HashSet_Static) + sizeof(Index*) + sizeof(_Atomic Index)) + sizeof(Index)*edge_indices_amount); - - ASSERT(all_memory); - - shared.hashset_per_layer = (MT_HashSet_Static*)all_memory; - - Key expected_size = PyArray_SIZE(init_layer); - for(uint i=0; isync_handle->shared_data = (void*)(&shared); - - //Mutex_init(&GM_mutex, 0); - - Threadpool_wakeup_workers(GMSamplers_thread_pool); - Threadpool_participate_until_completion(GMSamplers_thread_pool); - - Py_INCREF(init_layer); - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)init_layer); - - - - HashSet_init(&hash_set); - - Node* raw_nodes = PyArray_DATA(init_layer); - - for(uint i=0; i graph->node_count, it just doesn't make sense - - emit warning if (maximum) total number of samples > graph->node_count - - maybe (but really only maybe) check if target nodes actually occur in graph - */ - - PyObject* samples_per_layer = PyList_New(depth+1); - PyObject* load_per_layer = PyList_New(depth); - PyObject* edge_indices_between_layers = PyList_New(depth); - - - if((samples_per_layer == NULL) | (load_per_layer == NULL) | (edge_indices_between_layers == NULL)){ - printf("can't create return pylists\n"); - - Py_XDECREF(samples_per_layer); - Py_XDECREF(load_per_layer); - Py_XDECREF(edge_indices_between_layers); - - return NULL; - } - - Index prev_size; - - HashSet load_set; - HashSet total_samples; - - PyArrayObject* prev_layer; - - int target_nodes_references = 0; - - if((target_nodes == NULL) | ((PyObject*)target_nodes == Py_None)){ - PyArrayObject* init_layer = new_node_numpy(MACRO_MIN(samples_per_node,graph->node_count)); - - if(init_layer == NULL){ - puts("couldn't create init layer"); - Py_DECREF(samples_per_layer); - Py_DECREF(edge_indices_between_layers); - Py_DECREF(load_per_layer); - return NULL; - } - - HashSet_new(&load_set, samples_per_node); - HashSet_init(&load_set); - - HashSet_new(&total_samples, samples_per_node); - HashSet_init(&total_samples); - - Node* init_nodes = (Node*)PyArray_DATA(init_layer); - - Index upper_bound = MACRO_MIN(samples_per_node,graph->node_count); - - for(Index sample=0; samplenode_count; // TODO: need to make sure these are valid samples - if(HashSet_add_node(&load_set, node_sample)) - break; - } - - HashSet_add_node(&total_samples, node_sample); - - *init_nodes++ = node_sample; - } - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)init_layer); - - prev_size = samples_per_node; - - prev_layer = init_layer; - } - else{ - //TODO: what to do if target_nodes is not owned by the caller? - //TODO: what if target nodes doesnt have shape (N,) or (N,1)? - // Py_INCREF(target_nodes); - // target_nodes_references++; - prev_size = (Index)PyArray_SIZE(target_nodes); - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)target_nodes); - - - HashSet_new(&load_set, prev_size); - HashSet_init(&load_set); - - HashSet_new(&total_samples, samples_per_node); - HashSet_init(&total_samples); - - for(Index n = 0 ; n < prev_size; n++){ - Node* ns = (Node*)PyArray_GETPTR1(target_nodes, n); - HashSet_add_node(&load_set, *ns); - HashSet_add_node(&total_samples, *ns); - } - - prev_layer = target_nodes; - } - - HashSet node_hash_set; - HashSet_new(&node_hash_set, prev_size); - - HashSet node_tracker; - HashSet_new(&node_tracker, samples_per_node); - - - - // We allocate this much upfront because it will be used all almost surely - Index edge_list_size = prev_size*power(samples_per_node, depth); - Node* edge_list_canvas = (Index*)malloc(3*sizeof(Node)*edge_list_size); - - ASSERT(edge_list_canvas); - - for(uint layer=depth;layer>0; layer--){ - Index edge_list_cursor=0; - - HashSet_init(&node_hash_set); - - - - - Node* prev_layer_nodes = (Node*)PyArray_DATA(prev_layer); - - for(Index n=0; npre_neighbor_offsets[Node_To_Index(dst_node)]; - Index pre_neighbor_count = graph->pre_neighbor_offsets[Node_To_Index(dst_node)+1]-graph->pre_neighbor_offsets[Node_To_Index(dst_node)]; - - if(pre_neighbor_count <= samples_per_node){ - for(Index i=0; i (Index)(0.75*pre_neighbor_count)){ - Index* perm = (Index*)malloc(sizeof(Index)*pre_neighbor_count); - - ASSERT(perm); - - for(Index i=0; i graph->node_count, it just doesn't make sense - - emit warning if (maximum) total number of samples > graph->node_count - - maybe (but really only maybe) check if target nodes actually occur in graph - */ - - PyObject* samples_per_layer = PyList_New(depth+1); - PyObject* load_per_layer = PyList_New(depth); - PyObject* edge_indices_between_layers = PyList_New(depth); - - - if((samples_per_layer == NULL) | (load_per_layer == NULL) | (edge_indices_between_layers == NULL)){ - printf("can't create return pylists\n"); - - Py_XDECREF(samples_per_layer); - Py_XDECREF(load_per_layer); - Py_XDECREF(edge_indices_between_layers); - - return NULL; - } - - Index prev_size; - - HashSet load_set; - - PyArrayObject* prev_layer; - - if((target_nodes == NULL) | ((PyObject*)target_nodes == Py_None)){ - PyArrayObject* init_layer = new_node_numpy(MACRO_MIN(samplesize_per_layer,graph->node_count)); - - if(init_layer == NULL){ - puts("couldn't create init layer"); - Py_DECREF(samples_per_layer); - Py_DECREF(edge_indices_between_layers); - Py_DECREF(load_per_layer); - return NULL; - } - - HashSet_new(&load_set, samplesize_per_layer); - HashSet_init(&load_set); - - Node* init_nodes = (Node*)PyArray_DATA(init_layer); - - for(uint sample=0; samplenode_count; // TODO: need to make sure these are valid samples - if(HashSet_add_node(&load_set, node_sample)) - break; - } - - *init_nodes++ = node_sample; - } - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)init_layer); - - prev_size = samplesize_per_layer; - - prev_layer = init_layer; - } - else{ - //TODO: what to do if target_nodes is not owned by the caller? - //TODO: what if target nodes doesnt have shape (N,) or (N,1)? - Py_INCREF(target_nodes); - prev_size = (Index)PyArray_SIZE(target_nodes); - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)target_nodes); - - - HashSet_new(&load_set, prev_size); - HashSet_init(&load_set); - - Node* raw_target_nodes = PyArray_DATA(target_nodes); - - for(uint n = 0 ; n < prev_size; n++) - HashSet_add_node(&load_set, *raw_target_nodes++); - - - prev_layer = target_nodes; - } - - HashSet node_hash_set; - HashSet_new(&node_hash_set, prev_size); - - Index* edge_index_canvas = (Index*)malloc(sizeof(Index)*prev_size); - - ASSERT(edge_index_canvas); - - Index* sample_canvas = NULL; - Index sample_canvas_size=0; - - for(uint layer=depth;layer>0; layer--){ - Node* prev_layer_nodes = (Node*)PyArray_DATA(prev_layer); - - - - Index sum_pre_neighbor_count = sum_preneighbor_counts(prev_layer_nodes, (Index)PyArray_SIZE(prev_layer), graph->pre_neighbor_offsets); - - - - bool sample_canvas_valid = conditional_resize_memory_canvas((void**)&sample_canvas, (Index)sizeof(Index), &sample_canvas_size, sum_pre_neighbor_count); - - ASSERT(sample_canvas_valid); - - - gather_edge_indices(prev_layer_nodes, (Index)PyArray_SIZE(prev_layer), graph->pre_neighbor_offsets, sample_canvas); - - HashSet_init(&node_hash_set); - - // randomly sample edge index from sample_canvas and get the corresponding node sample from that index - // swapping randomly selected indices from sample_canvas to the front so that they don't get sampled again - Index cursor=0; - while(cursor graph->node_count, it just doesn't make sense - - emit warning if (maximum) total number of samples > graph->node_count - - maybe (but really only maybe) check if target nodes actually occur in graph - */ - - PyObject* samples_per_layer = PyList_New(depth+1); - PyObject* load_per_layer = PyList_New(depth); - PyObject* edge_indices_between_layers = PyList_New(depth); - - - if((samples_per_layer == NULL) | (load_per_layer == NULL) | (edge_indices_between_layers == NULL)){ - printf("can't create return pylists\n"); - - Py_XDECREF(samples_per_layer); - Py_XDECREF(load_per_layer); - Py_XDECREF(edge_indices_between_layers); - - return NULL; - } - - Index prev_size; - - HashSet load_set; - - PyArrayObject* prev_layer; - - if((target_nodes == NULL) | ((PyObject*)target_nodes == Py_None)){ - PyArrayObject* init_layer = new_node_numpy(MACRO_MIN(samplesize_per_layer,graph->node_count)); - - if(init_layer == NULL){ - puts("couldn't create init layer"); - Py_DECREF(samples_per_layer); - Py_DECREF(edge_indices_between_layers); - Py_DECREF(load_per_layer); - return NULL; - } - - HashSet_new(&load_set, samplesize_per_layer); - HashSet_init(&load_set); - - Node* init_nodes = (Node*)PyArray_DATA(init_layer); - - for(uint sample=0; samplenode_count; // TODO: need to make sure these are valid samples - if(HashSet_add_node(&load_set, node_sample)) - break; - } - - *init_nodes++ = node_sample; - } - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)init_layer); - - prev_size = samplesize_per_layer; - - prev_layer = init_layer; - } - else{ - //TODO: what to do if target_nodes is not owned by the caller? - //TODO: what if target nodes doesnt have shape (N,) or (N,1)? - Py_INCREF(target_nodes); - prev_size = (Index)PyArray_SIZE(target_nodes); - - PyList_SET_ITEM(samples_per_layer, depth, (PyObject*)target_nodes); - - - HashSet_new(&load_set, prev_size); - HashSet_init(&load_set); - - Node* raw_target_nodes = PyArray_DATA(target_nodes); - - for(uint n = 0 ; n < prev_size; n++) - HashSet_add_node(&load_set, *raw_target_nodes++); - - - prev_layer = target_nodes; - } - - HashSet node_hash_set; - HashSet_new(&node_hash_set, prev_size); - - Index* edge_index_canvas = NULL; - Index edge_index_canvas_size=0; - - for(uint layer=depth;layer>0; layer--){ - Node* prev_layer_nodes = (Node*)PyArray_DATA(prev_layer); - - - - Int sum_pre_neighbor_count = (Int)sum_preneighbor_counts(prev_layer_nodes, (Index)PyArray_SIZE(prev_layer), graph->pre_neighbor_offsets); - - - - bool edge_index_canvas_valid = conditional_resize_memory_canvas((void**)&edge_index_canvas, (Index)sizeof(Index), &edge_index_canvas_size, sum_pre_neighbor_count); - - ASSERT(edge_index_canvas_valid); - - - gather_edge_indices(prev_layer_nodes, (Index)PyArray_SIZE(prev_layer), graph->pre_neighbor_offsets, edge_index_canvas); - - HashSet_init(&node_hash_set); - - // randomly sample edge index from sample_canvas and get the corresponding node sample from that index - // then we swap all edge indices to the front that have all the same src node like the node sample - // this fully connects the new node to every node in prev_layer if that edge exists in the graph - Int cursor=0; - while(cursorl && src_node_at(graph, edge_index_canvas[(Index)r]) != node_sample) - r--; - - if(l < r){ - Index tmp = edge_index_canvas[(Index)l]; - edge_index_canvas[(Index)l] = edge_index_canvas[(Index)r]; - edge_index_canvas[(Index)r] = tmp; - - l++; - r--; - } - else - break; - } - - cursor = (Index)l; - - - - if(node_hash_set.size == samplesize_per_layer) - break; - } - - - - PyArrayObject* edge_indices = index_array_to_numpy(edge_index_canvas, cursor); - - - PyArrayObject* new_layer = HashSet_to_numpy(&node_hash_set); - PyArrayObject* layer_load = HashSet_to_numpy(&load_set); - - // we swap the 2 hashsets since node_hash_set already contains the nodes - // that are needed for load_set in next iteration - // NOTE: simply overwriting load_set doesn't work since that would introduce - // a dangling pointer with load_set.keys - HashSet tmp = load_set; - load_set = node_hash_set; - node_hash_set = tmp; - - - prev_size = (Index)PyArray_SIZE(new_layer); - prev_layer = new_layer; - - PyList_SET_ITEM(samples_per_layer, layer-1, (PyObject*)new_layer); - PyList_SET_ITEM(load_per_layer, layer-1, (PyObject*)layer_load); - PyList_SET_ITEM(edge_indices_between_layers, layer-1, (PyObject*)edge_indices); - } - - HashSet_free(&load_set); - HashSet_free(&node_hash_set); - free(edge_index_canvas); - - return PyTuple_Pack(3, samples_per_layer, edge_indices_between_layers, load_per_layer); -} - - -static PyObject* GM_set_seed(PyObject* csamplers, PyObject* args){ - int64_t seed; - - if(!PyArg_ParseTuple(args, "I", &seed)){ - printf("If you don't provide proper arguments, you can't have any neighbor sampling.\nHow can you have any neighbor sampling if you don't provide proper arguments?\n"); - return NULL; - } - - srand(seed); - - Py_RETURN_NONE; -} - - -static PyMethodDef GMSamplersMethods[] = { - {"c_sample_nodewise", GMSamplers_sample_nodewise, METH_VARARGS, "Random sampling within a graph through multiple layers where each pre-neighborhood of a node in a layer gets sampled separately."}, - #ifdef Thread_Count_Arg - {"c_sample_nodewise_mt_static", GMSamplers_sample_nodewise_mt_static, METH_VARARGS, "Random sampling within a graph through multiple layers where each pre-neighborhood of a node in a layer gets sampled separately. Multi-threaded with static lock-free Hashsets"}, - #endif - {"c_sample_layerwise_fully_connected", GMSamplers_sample_layerwise_fully_connected, METH_VARARGS, "Random sampling within a graph through multiple layers where the pre-neighborhood of a layer gets sampled jointly and the layers are fully connected."}, - - // TODO: figure out how to sample without collecting all edges in a list first - {"c_sample_layerwise_randomly_connected", GMSamplers_sample_layerwise_randomly_connected, METH_VARARGS, "Random sampling within a graph through multiple layers where the pre-neighborhood of a layer gets sampled jointly, but only a random subset of the connections between layers is sampled."}, - - {"compute_edge_list", GMSamplers_compute_edge_list, METH_VARARGS, "Compute edge list from onset_div and duration_div."}, - {"c_random_score_region", random_score_region, METH_VARARGS, "Samples a random region (integer interval) from a score graph"}, - {"c_extend_score_region_via_neighbor_sampling", extend_score_region_via_neighbor_sampling, METH_VARARGS, "Given a score region, add samples from outside the region aquired via neighboorhood sampling"}, - {"c_sample_neighbors_in_score_graph", sample_neighbors_in_score_graph, METH_VARARGS, "nodewise sampling of neighbors without pre-computed lookup table"}, - {"c_sample_preneighbors_within_region", sample_preneighbors_within_region, METH_VARARGS, ""}, - {"c_set_seed", GM_set_seed, METH_VARARGS, ""}, - {NULL, NULL, 0, NULL} -}; - -static struct PyModuleDef GMSamplersmodule = { - PyModuleDef_HEAD_INIT, - "csamplers", - NULL, - -1, - GMSamplersMethods -}; - -PyMODINIT_FUNC PyInit_csamplers(){ - import_array(); - if(PyType_Ready(&GraphType) < 0) - return NULL; - - PyObject* module = PyModule_Create(&GMSamplersmodule); - - if(module==NULL) - return NULL; - - Py_INCREF(&GraphType); - - - if(PyModule_AddObject(module, "Graph", (PyObject*)&GraphType) < 0){ - Py_DECREF(&GraphType); - Py_DECREF(module); - return NULL; - } - - #ifdef Thread_Count_Arg - GMSamplers_thread_pool = (Threadpool*)malloc(sizeof(Threadpool) + sizeof(Stack) + sizeof(SynchronizationHandle)); - - Stack* q = (Stack*)(GMSamplers_thread_pool+1); - - SynchronizationHandle* i = (SynchronizationHandle*)(q+1); - - Threadpool_init(GMSamplers_thread_pool, Thread_Count_Arg-1, i, q); - #endif + Threadpool_init(GMSamplers_thread_pool, Thread_Count_Arg-1, i, q); + #endif return module; } - -//#include -//#include - -//static PyObject* GMSamplers_compute_beat_edges(PyObject* csamplers, PyObject* args) { -// // Parse arguments from Python -// PyArrayObject* onset_beat; -// float min_beat_length; -// float max_beat_length; -// -// if(!PyArg_ParseTuple(args, "O!ff", &PyArray_Type, &onset_beat, &min_beat_length, &max_beat_length)){ -// return NULL; -// } -// -// // Convert PyArrayObject to C array -// float* onset_beat_arr = (float*)PyArray_DATA(onset_beat); -// int len_onset_beat = PyArray_SIZE(onset_beat); -// -// // Your C code starts here -// float min_onset_beat = onset_beat_arr[0]; -// float max_onset_beat = onset_beat_arr[0]; -// for (int i = 1; i < len_onset_beat; i++) { -// if (onset_beat_arr[i] < min_onset_beat) { -// min_onset_beat = onset_beat_arr[i]; -// } -// if (onset_beat_arr[i] > max_onset_beat) { -// max_onset_beat = onset_beat_arr[i]; -// } -// } -// -// // ... rest of your C code ... -// if (min_onset_beat < 0) { -// for (int i = 0; i < len_onset_beat; i++) { -// onset_beat[i] -= min_onset_beat; -// } -// max_onset_beat -= min_onset_beat; -// } -// -// int nodes_len = (int)max_onset_beat + 1; -// int* beat_cluster = (int*)calloc(len_onset_beat, sizeof(int)); -// for (int i = 0; i < len_onset_beat; i++) { -// beat_cluster[i] = -1; -// } -// -// int** edges = (int**)malloc(nodes_len * sizeof(int*)); -// int* edges_len = (int*)calloc(nodes_len, sizeof(int)); -// -// int b = 0; -// for (int i = 0; i < len_onset_beat; i++) { -// // Find the correct b value for the current onset_beat value -// while (onset_beat[i] >= b + 1) { -// b++; -// } -// -// // Reallocate memory for edges[b] if necessary -// edges[b] = (int*)realloc(edges[b], (edges_len[b] + 1) * sizeof(int)); -// -// // Add the current index to edges[b] -// edges[b][edges_len[b]] = i; -// edges_len[b]++; -// beat_cluster[i] = b; -// } -// -// int* beat_index = (int*)malloc(nodes_len * sizeof(int)); -// for (int i = 0; i < nodes_len; i++) { -// beat_index[i] = i; -// } -// -// int** beat_edges = (int**)malloc(2 * sizeof(int*)); -// beat_edges[0] = (int*)malloc(nodes_len * sizeof(int)); -// beat_edges[1] = (int*)malloc(nodes_len * sizeof(int)); -// for (int i = 0; i < nodes_len; i++) { -// beat_edges[0][i] = edges[i][0]; -// beat_edges[1][i] = i; -// } -// -// // Convert the result back to a PyArrayObject -// npy_intp dims[2] = {2, nodes_len}; -// PyArrayObject* beat_edges = (PyArrayObject*)PyArray_SimpleNew(2, dims, NPY_INT); -// int** beat_edges_data = PyArray_DATA(beat_edges); -// memcpy(beat_edges_data, beat_edges, 2 * nodes_len * sizeof(int)); -// // Convert beat_index to a PyArrayObject -// PyArrayObject* beat_index = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_INT); -// int* beat_index_data = PyArray_DATA(beat_index); -// memcpy(beat_index_data, beat_index, nodes_len * sizeof(int)); -// // Convert beat_cluster to a PyArrayObject -// PyArrayObject* beat_cluster = (PyArrayObject*)PyArray_SimpleNew(1, &len_onset_beat, NPY_INT); -// int* beat_cluster_data = PyArray_DATA(beat_cluster); -// memcpy(beat_cluster_data, beat_cluster, len_onset_beat * sizeof(int)); -// -// -// // Return the result -// return PyTuple_Pack(3, beat_edges, beat_index, beat_cluster); -//} \ No newline at end of file diff --git a/tests/test_graph_creation.py b/tests/test_graph_creation.py index 727ca04..89ec13d 100644 --- a/tests/test_graph_creation.py +++ b/tests/test_graph_creation.py @@ -23,19 +23,22 @@ def edges_from_note_array(note_array): edg_src = list() edg_dst = list() - start_rest_index = len(note_array) + edge_type = list() for i, x in enumerate(note_array): for j in np.where((note_array["onset_div"] == x["onset_div"]))[0]: #& (note_array["id"] != x["id"]))[0]: edg_src.append(i) edg_dst.append(j) + edge_type.append(0) for j in np.where(note_array["onset_div"] == x["onset_div"] + x["duration_div"])[0]: edg_src.append(i) edg_dst.append(j) + edge_type.append(1) for j in np.where((x["onset_div"] < note_array["onset_div"]) & (x["onset_div"] + x["duration_div"] > note_array["onset_div"]))[0]: edg_src.append(i) edg_dst.append(j) + edge_type.append(2) end_times = note_array["onset_div"] + note_array["duration_div"] for et in np.sort(np.unique(end_times))[:-1]: @@ -48,9 +51,11 @@ def edges_from_note_array(note_array): for j in dst: edg_src.append(i) edg_dst.append(j) + edge_type.append(3) edges = np.array([edg_src, edg_dst]) - return edges + edge_types = np.array(edge_type) + return edges, edge_types class TestGraphMuse(unittest.TestCase): @@ -61,12 +66,12 @@ def test_edge_list(self): score_path = os.path.join(os.path.dirname(__file__), "samples", "wtc1f01.musicxml") score = pt.load_score(score_path) note_array = score.note_array() - edges_python = np.sort(edges_from_note_array(note_array)) + edges_python = np.sort(edges_from_note_array(note_array)[0]) edge_list, edge_types = sam.compute_edge_list(note_array['onset_div'].astype(np.int32), note_array['duration_div'].astype(np.int32)) edges_c = np.sort(edge_list) self.assertTrue(edges_c.shape==edges_python.shape) self.assertTrue((edges_c==edges_python).all()) - print("Edgle list creation assertions passed") + print("Edge list creation assertions passed") def test_graph_creation(self): part = pt.load_score(pt.EXAMPLE_MUSICXML)[0] @@ -97,19 +102,24 @@ def test_edge_creation_speed(self): note_array = np.vstack((ons, dur, pitch, beats)) # transform to structured array note_array = np.core.records.fromarrays(note_array, names='onset_div,duration_div,pitch,onset_beat') + # sort onset_div + note_array = note_array[np.argsort(note_array["onset_div"])] # create features array of shape (num_nodes, num_features) time_baseline = time.time() - _ = edges_from_note_array(note_array) + edges_na, etype_na = edges_from_note_array(note_array) time_baseline = time.time() - time_baseline time_c = time.time() - _ = sam.compute_edge_list(note_array['onset_div'].astype(np.int32), + edges_gm, etype_gm = sam.compute_edge_list(note_array['onset_div'].astype(np.int32), note_array['duration_div'].astype(np.int32)) time_c = time.time() - time_c print("Time for python edge list creation: ", time_baseline) print("Time for C edge list creation: ", time_c) self.assertTrue(time_c < time_baseline) + self.assertTrue(edges_na.shape == edges_gm.shape) + # sort src edges to compare + self.assertTrue((np.sort(edges_na, axis=1) == np.sort(edges_gm, axis=1)).all()) diff --git a/tests/test_sampler_functions.py b/tests/test_sampler_functions.py deleted file mode 100644 index d4ef245..0000000 --- a/tests/test_sampler_functions.py +++ /dev/null @@ -1,109 +0,0 @@ -import graphmuse.samplers as sam -import os -import partitura as pt -import numpy as np -import unittest - -class TestGraphMuse(unittest.TestCase): - score_path = os.path.join(os.path.dirname(__file__), "samples", "wtc1f01.musicxml") - score = pt.load_score(score_path) - note_array = score.note_array() - edge_list, _ = sam.compute_edge_list(note_array['onset_div'].astype(np.int32), - note_array['duration_div'].astype(np.int32)) - perm = edge_list[1, :].argsort() - edge_list = edge_list[:, perm] - _, uniq_indices = np.unique(edge_list[1, :], return_index=True) - - for i in range(len(uniq_indices) - 1): - edge_list[0, uniq_indices[i]:uniq_indices[i + 1]].sort() - - edge_list[0, uniq_indices[-1]:].sort() - - g = sam.graph(edge_list) - - def test_preneighborhood_count(self): - N = 10 - - edges = np.empty((2, N*(N+1)//2), dtype=np.int32) - - cursor = 0 - - for i in range(N): - for j in range(N-i-1,-1,-1): - edges[0, cursor]=j - edges[1, cursor]=i - cursor+=1 - - graph = sam.graph(edges) - - graph.print() - - for i in range(N): - assert graph.preneighborhood_count(i)==N-i - - def test_random_score_region(self): - region = sam.random_score_region(self.note_array, 100) - print(region) - - def test_set_seed(self): - sam.c_set_seed(0) - - def test_random_score_region_with_seed(self): - sam.c_set_seed(0) - region = sam.random_score_region(self.note_array, 100) - (left_ext, left_edges), (right_ext, right_edges) = sam.extend_score_region_via_neighbor_sampling(self.g, self.note_array, - region, 2) - print(region) - - def test_sample_neighbors_in_score_graph(self): - right_right_ext, right_right_edges, _ = sam.sample_neighbors_in_score_graph(self.note_array, 1, 3, []) - -# region = sam.random_score_region(note_array, 100) -# -# print(region) -# -# sam.c_set_seed(10101) -# -# region = sam.random_score_region(note_array, 100) -# -# print(region) -# -# -# -# print(left_ext) -# print() -# print(left_edges) -# print("\n----------------------\n") -# print(right_ext) -# print() -# print(right_edges) -# print("\n----------------------\n") -# -# -# -# print(right_right_ext) -# print() -# print(right_right_edges) - -#test_preneighborhood_count() -# from tests import test_samplers, test_graph_creation - -# ts = test_samplers.TestSamplers() - -# ts.test_nodewise_sampling(sam.sample_nodewise) -# ts.test_nodewise_sampling(sam.sample_nodewise_mt_static) - -# test_graph_creation.TestGraphMuse().test_edge_list() - -# onsets = numpy.random.randint(0,10,20) -# onsets.sort() -# na = {"onset_div": onsets} -# print(onsets) -# print(sam.random_score_region(na, 5)) - - - - - - - diff --git a/tests/test_samplers.py b/tests/test_samplers.py deleted file mode 100644 index 4436801..0000000 --- a/tests/test_samplers.py +++ /dev/null @@ -1,71 +0,0 @@ -import graphmuse.samplers as sam -import unittest -import numpy as np -import torch - -torch.random.manual_seed(0) -np.random.seed(0) -sam.c_set_seed(0) - - -class TestSamplers(unittest.TestCase): - - def test_nodewise_sampling(self): - for nodewise_sampling_method in (sam.sample_nodewise,): - print(f"Unit Testing for {nodewise_sampling_method.__name__}") - - V = 100 - E = 4*V - - edges = np.random.randint(0, V, (2, E), dtype=np.int32) - - V = np.max(edges) - - resort_idx = np.lexsort((edges[0], edges[1])) - edges = edges[:, resort_idx] - - # Add random edge types - edges = np.vstack((edges, np.random.randint(0, 4, E, dtype=np.int32))) - - g = sam.graph(edges) - - target_size = np.random.randint(1, V//4, 1)[0] - target = np.unique(np.random.randint(0, V, target_size, np.int32)) - depth = 2 - samples_per_node = 3 - samples_per_layer, edges_between_layers, load_per_layer, total_samples = nodewise_sampling_method(g, depth, samples_per_node, target) - - self.assertTrue(len(samples_per_layer) == depth+1) - self.assertTrue(len(edges_between_layers) == depth) - self.assertTrue(len(load_per_layer) == depth) - self.assertTrue(samples_per_layer[-1].shape == target.shape) - self.assertTrue((sorted(samples_per_layer[-1]) == sorted(target))) - - for l in range(depth): - self.assertTrue(torch.all(torch.unique(torch.hstack((samples_per_layer[l], samples_per_layer[l+1]))) == load_per_layer[l].sort()[0])) - current_edges = edges_between_layers[l] - # Check that each node is sampled at most once - samples_counter = dict() - for edge in current_edges.t(): - src = edge[0].item() - dst = edge[1].item() - if dst in samples_counter.keys(): - samples_counter[dst] += 1 - else: - samples_counter[dst] = 1 - - for dst, c in samples_counter.items(): - self.assertTrue(c == min(samples_per_node, g.preneighborhood_count(dst)), f"count of {dst} is {c}, but pnc is {g.preneighborhood_count(dst)}") - - unique_src = np.unique(current_edges[0]) - - self.assertTrue(unique_src.shape == samples_per_layer[l].shape) - self.assertTrue(list(unique_src) == sorted(samples_per_layer[l])) - - unique_dst = current_edges[1] - - for sample in samples_per_layer[l+1]: - if sample not in unique_dst: - self.assertTrue(g.preneighborhood_count(sample.item()) == 0) - - print(f"{nodewise_sampling_method.__name__} passed all tests")