Merge pull request #962 from Christopher-Bradshaw/docs-readthrough

Docs readthrough [WIP]
astropy · Jan 7, 2020 · 784dde1 · 784dde1
2 parents b8198cf + f8c7372
commit 784dde1
Show file tree

Hide file tree

Showing 35 changed files with 255 additions and 232 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -26,6 +26,7 @@ Getting Started
    quickstart_and_tutorials/index
    function_usage/index
    source_notes/index
+   quickstart_and_tutorials/development/index
 
 ***********
 What's New?

diff --git a/docs/quickstart_and_tutorials/development/getting_started.rst b/docs/quickstart_and_tutorials/development/getting_started.rst
@@ -0,0 +1,49 @@
+.. _getting_started_developers:
+
+*************************
+Contributing to Halotools
+*************************
+
+All halotools development happens in the github repository. To contribute, first clone the repo.
+Then install the dependencies listed on the :ref:`step_by_step_install` page.
+
+
+Code
+====
+
+Halotools contains a compiled component. To compile all cython (``.pyx``) files inplace run, ::
+
+   python3 setup.py build_ext --inplace
+
+If you modify a ``.pyx`` file use the same command to recompile it. Subsequent runs will only compile files whose source has changed and so will be much quicker.
+
+Halotools also has comprehensive unit tests and uses pytest. To run all tests, assuming you are in the base of the repository, first change directory into ``halotools`` and then run ``pytest``. ::
+
+   cd halotools
+   pytest
+
+If you have made a change and only want to run a subset of tests run,  ::
+
+   pytest -k tests_matching_this_string_will_run
+
+Run ``pytest --help`` for a full list of options.
+
+
+Docs
+====
+
+First ensure that the halotools package and sphinx are installed. From the base of the repository run, ::
+
+   pip3 install -e .
+   pip3 install sphinx==1.3.1 # see docs/conf.py for the sphinx version
+
+Then build documentation with, ::
+
+   cd docs
+   make html
+
+You can see the built documentation in ``docs/_build/html/``. The easiest way to view it in your browser is to spin up a local server. One way to do this is to run, from the built directory, ::
+
+   python3 -m http.server
+
+The docs should then be viewable at ``localhost:8000`` (the port will be logged when you start the server).
diff --git a/docs/quickstart_and_tutorials/development/index.rst b/docs/quickstart_and_tutorials/development/index.rst
@@ -14,5 +14,6 @@ contribute to the Halotools code base.
 
    staying_up_to_date
    bug_reports
+   getting_started
    contributors
    ../../changelog
diff --git a/halotools/empirical_models/abunmatch/bin_free_cam.py b/halotools/empirical_models/abunmatch/bin_free_cam.py
@@ -11,9 +11,14 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
             assume_x_is_sorted=False, assume_x2_is_sorted=False, return_indexes=False):
     r"""
     Given a set of input points with primary property `x` and secondary property `y`,
-    use conditional abundance matching to map new values `ynew` onto the input points
-    such that :math:`P(<y_{\rm new} | x) = P(<y_2 | x)`, and also that
-    `y` and `ynew` are in monotonic correspondence at fixed `x`.
+    and a mapping between that primary property and another secondary property
+    (`y2 | x2`), assign values of the `y2` property to the input points.
+
+    The `y2` that is assigned (`ynew`) is in monotonic correspondence with `y` at
+    fixed `x`. Therefore, :math:`P(<y_{\rm new} | x) = P(<y | x)`.
+
+    See :ref:`cam_tutorial` demonstrating how to use this function in galaxy-halo
+    modeling with several worked examples.
 
     Parameters
     ----------
@@ -24,10 +29,10 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
         Numpy array of shape (n1, ) storing the secondary property of the input points.
 
     x2 : ndarray
-        Numpy array of shape (n2, ) storing the primary property of the desired distribution.
+        Numpy array of shape (n2, ) storing the primary property of the desired distribution. This should be the same physical property (e.g. halo mass) as x.
 
     y2 : ndarray
-        Numpy array of shape (n2, ) storing the secondary property of the desired distribution.
+        Numpy array of shape (n2, ) storing the secondary property of the desired distribution. This is a different physical property to y.
 
     nwin : int
         Odd integer specifying the size of the window
@@ -75,9 +80,6 @@ def conditional_abunmatch(x, y, x2, y2, nwin, add_subgrid_noise=True,
     values of ``window_length`` must exceed 100. Values more tha a few hundred are
     likely overkill when using the (recommended) sub-grid noise option.
 
-    See :ref:`cam_tutorial` demonstrating how to use this
-    function in galaxy-halo modeling with several worked examples.
-
     With the release of Halotools v0.7, this function replaced a previous function
     of the same name. The old function is now called
     `~halotools.empirical_models.conditional_abunmatch_bin_based`.

diff --git a/halotools/empirical_models/abunmatch/noisy_percentile.py b/halotools/empirical_models/abunmatch/noisy_percentile.py
@@ -69,7 +69,7 @@ def noisy_percentile(percentile, correlation_coeff, seed=None, random_percentile
     Returns
     -------
     noisy_percentile : ndarray
-        Numpy array of shape (ngals, ) storing an array such that
+        Numpy array of shape (npts, ) storing an array such that
         the Spearman rank-order correlation coefficient between
         ``percentile`` and ``noisy_percentile`` is equal to the input
         ``correlation_coeff``.

diff --git a/halotools/mock_observables/catalog_analysis_helpers.py b/halotools/mock_observables/catalog_analysis_helpers.py
@@ -131,7 +131,7 @@ def return_xyz_formatted_array(x, y, z, period=np.inf,
         galaxy positions under the distant-observer approximation.
         Default is no distortions.
 
-    cosmology : object, optional
+    cosmology : astropy.cosmology.Cosmology, optional
         Cosmology to assume when applying redshift-space distortions,
         e.g., the cosmology of the simulation.
         Default is set in `sim_manager.sim_defaults`.
@@ -264,7 +264,7 @@ def apply_zspace_distortion(true_pos, peculiar_velocity, redshift, cosmology, Lb
         redshift of the snapshot. If using a lightcone, this argument is the
         redshift of each point.
 
-    cosmology : object
+    cosmology : astropy.cosmology.Cosmology
         Cosmology to assume when applying redshift-space distortions,
         e.g., the cosmology of the simulation.
 
@@ -308,9 +308,9 @@ def cuboid_subvolume_labels(sample, Nsub, Lbox):
 
     Nsub : array_like
         Length-3 numpy array of integers indicating how many times to split the volume
-        along each dimension.  If single integer, N, is supplied, ``Nsub`` is set to
+        along each dimension.  If a single integer, N, is supplied, ``Nsub`` is set to
         [N,N,N], and the volume is split along each dimension N times.  The total number
-        of subvolumes is then given by numpy.prod(Nsub).
+        of subvolumes is given by numpy.prod(Nsub).
 
     Lbox : array_like
         Length-3 numpy array definging the lengths of the sides of the cubical volume
@@ -320,7 +320,7 @@ def cuboid_subvolume_labels(sample, Nsub, Lbox):
     Returns
     -------
     labels : numpy.array
-        numpy array with integer labels in the range [1,numpy.prod(Nsub)] indicating
+        (Npts, ) numpy array with integer labels in the range [1,numpy.prod(Nsub)] indicating
         the subvolume each point in ``sample`` occupies.
 
     N_sub_vol : int

diff --git a/halotools/mock_observables/isolation_functions/conditional_cylindrical_isolation.py b/halotools/mock_observables/isolation_functions/conditional_cylindrical_isolation.py
@@ -60,7 +60,7 @@ def conditional_cylindrical_isolation(sample1, sample2, rp_max, pi_max,
         If a single float is given, ``rp_max`` is assumed to be the same for each galaxy in
         ``sample1``. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.
 
-    pi_max : float
+    pi_max : array_like
         half the length of cylinders to search for neighbors around galaxies in ``sample1``.
         If a single float is given, ``pi_max`` is assumed to be the same for each galaxy in
         ``sample1``. Length units are comoving and assumed to be in Mpc/h, here and throughout Halotools.
@@ -117,7 +117,7 @@ def conditional_cylindrical_isolation(sample1, sample2, rp_max, pi_max,
     Returns
     -------
     is_isolated : numpy.array
-        array of booleans indicating if each point in `sample1` is isolated.
+        (Npts1, ) array of booleans indicating if each point in `sample1` is isolated.
 
     Notes
     -----

diff --git a/halotools/mock_observables/isolation_functions/spherical_isolation.py b/halotools/mock_observables/isolation_functions/spherical_isolation.py
@@ -88,7 +88,7 @@ def spherical_isolation(sample1, sample2, r_max, period=None,
     Returns
     -------
     is_isolated : numpy.array
-        array of booleans indicating if each point in `sample1` is isolated.
+        (Npts1, ) array of booleans indicating if each point in `sample1` is isolated.
 
     Examples
     --------

diff --git a/halotools/mock_observables/large_scale_density/large_scale_density_spherical_annulus.py b/halotools/mock_observables/large_scale_density/large_scale_density_spherical_annulus.py
@@ -88,7 +88,7 @@ def large_scale_density_spherical_annulus(sample, tracers, inner_radius, outer_r
     Returns
     --------
     number_density : array_like
-        Length-Npts array of number densities
+        Length-Npts1 array of number densities
 
     Examples
     ---------
@@ -105,9 +105,9 @@ def large_scale_density_spherical_annulus(sample, tracers, inner_radius, outer_r
             period, sample_volume, num_threads, approx_cell1_size)
         )
 
-    _ = npairs_per_object_3d(sample, tracers, rbins, period=period,
+    result = npairs_per_object_3d(sample, tracers, rbins, period=period,
         num_threads=num_threads, approx_cell1_size=approx_cell1_size)
-    result = np.diff(_, axis=1)
+    result = np.diff(result, axis=1)
 
     environment_volume = (4/3.)*np.pi*(outer_radius**3 - inner_radius**3)
     number_density = result/environment_volume

diff --git a/halotools/mock_observables/large_scale_density/large_scale_density_spherical_volume.py b/halotools/mock_observables/large_scale_density/large_scale_density_spherical_volume.py
@@ -87,7 +87,7 @@ def large_scale_density_spherical_volume(sample, tracers, radius,
     Returns
     --------
     number_density : array_like
-        Length-Npts array of number densities
+        Length-Npts1 array of number densities
 
     Examples
     ---------
@@ -103,9 +103,8 @@ def large_scale_density_spherical_volume(sample, tracers, radius,
             sample, tracers, radius, period, sample_volume, num_threads, approx_cell1_size)
         )
 
-    _ = npairs_per_object_3d(sample, tracers, rbins, period=period,
-        num_threads=num_threads, approx_cell1_size=approx_cell1_size)
-    result = _[:, 0]
+    result = npairs_per_object_3d(sample, tracers, rbins, period=period,
+        num_threads=num_threads, approx_cell1_size=approx_cell1_size)[:, 0]
 
     environment_volume = (4/3.)*np.pi*radius**3
     number_density = result/environment_volume
@@ -123,8 +122,8 @@ def _large_scale_density_spherical_volume_process_args(
     """
     sample = np.atleast_1d(sample)
     tracers = np.atleast_1d(tracers)
-    _ = np.atleast_1d(radius).astype(float)
-    rbins = np.append(_, _[0]+0.0001)
+    rbins = np.atleast_1d(radius).astype(float)
+    rbins = np.append(rbins, rbins[0]+0.0001)
 
     if period is None:
         if sample_volume is None:

diff --git a/halotools/mock_observables/mock_observables_helpers.py b/halotools/mock_observables/mock_observables_helpers.py
@@ -143,6 +143,7 @@ def get_separation_bins_array(separation_bins):
     try:
         assert separation_bins.ndim == 1
         assert len(separation_bins) > 1
+        # cbx_aph: There are lots of places like this where we never check that the array is increasing if it has 2 elements. The reason for this is that array_is_monotonic requires 3 elements. This could easily be fixed because I think we could allow 2 elements arrays to array_is_monotonic - it would always be monotonic but we would know whether it was increasing or decreasing
         if len(separation_bins) > 2:
             assert array_is_monotonic(separation_bins, strict=True) == 1
         assert np.all(separation_bins > 0)

diff --git a/halotools/mock_observables/occupation_stats.py b/halotools/mock_observables/occupation_stats.py
@@ -12,7 +12,9 @@
 
 def hod_from_mock(haloprop_galaxies, haloprop_halos, haloprop_bins=None):
     r"""
-    Calculate the HOD of a mock galaxy sample.
+    Calculate the HOD of a mock galaxy sample. It returns the expected number
+    of galaxies per halo, in bins of whatever halo property
+    ``haloprop_galaxies`` and ``haloprop_halos`` are given in.
 
     Parameters
     ----------
@@ -27,10 +29,14 @@ def hod_from_mock(haloprop_galaxies, haloprop_halos, haloprop_bins=None):
         Array of shape (num_halos, ) used to bin the halos in the same manner
         as the galaxies so that the counts in each bin can be properly normalized.
 
+        Note that this property (e.g. halo mass) must be the same as used for
+        ``haloprop_halos``.
+
     haloprop_bins : ndarray, optional
-        Array defining the bin edges. If this array is not passed, then you will probably
-        obtain better results if you pass in logarithmic quantities for the
-        ``haloprop_galaxies`` and ``haloprop_halos`` arrays.
+        Array defining the bin edges. If None, this defaults to 10 linearly
+        spaced bins and so you will probably obtain better results if you
+        pass in logarithmic quantities for the ``haloprop_galaxies``
+        and ``haloprop_halos`` arrays.
 
     Returns
     -------
@@ -44,10 +50,12 @@ def hod_from_mock(haloprop_galaxies, haloprop_halos, haloprop_bins=None):
     Examples
     --------
     In the following calculation, we'll populate a mock catalog and then manually
-    compute the central galaxy HOD from the ``galaxy_table``.
+    compute the central galaxy HOD (number of central galaxies above the mass
+    threshold as a function of halo mass) from the ``galaxy_table``.
 
     >>> from halotools.empirical_models import PrebuiltHodModelFactory
     >>> from halotools.sim_manager import FakeSim
+    >>> from halotools.mock_observables import hod_from_mock
     >>> model = PrebuiltHodModelFactory('leauthaud11', threshold=10.75)
     >>> halocat = FakeSim()
     >>> model.populate_mock(halocat)
@@ -81,9 +89,9 @@ def hod_from_mock(haloprop_galaxies, haloprop_halos, haloprop_bins=None):
 
 
 def get_haloprop_of_galaxies(halo_id_galaxies, halo_id_halos, haloprop_halos):
-    """ Calculate the host halo property of every galaxy with a ``halo_id`` that
-    matches one of the input halos. This function can be used, for example,
-    to calculate the host halo mass of a galaxy.
+    """ Determine the halo property in ``haloprop_halos`` for each galaxy.
+    This crossmatches the galaxy catalog with the halo catalog using their
+    ``halo_id``. Return the halo property for galaxies with a match, else nan.
 
     Parameters
     ----------
@@ -99,7 +107,7 @@ def get_haloprop_of_galaxies(halo_id_galaxies, halo_id_halos, haloprop_halos):
 
     haloprop_halos : ndarray
         Array of shape (num_halos, ) storing the halo property of interest,
-        e.g., ``halo_mvir``.
+        e.g., ``halo_vpeak`` or ``halo_spin``.
 
     Returns
     -------
@@ -117,6 +125,7 @@ def get_haloprop_of_galaxies(halo_id_galaxies, halo_id_halos, haloprop_halos):
 
     >>> from halotools.empirical_models import PrebuiltHodModelFactory
     >>> from halotools.sim_manager import FakeSim
+    >>> from halotools.mock_observables import get_haloprop_of_galaxies
     >>> model = PrebuiltHodModelFactory('leauthaud11')
     >>> halocat = FakeSim()
     >>> model.populate_mock(halocat)

diff --git a/halotools/mock_observables/pair_counters/marked_npairs_3d.py b/halotools/mock_observables/pair_counters/marked_npairs_3d.py
@@ -20,19 +20,24 @@
 __all__ = ('marked_npairs_3d', )
 
 
-def marked_npairs_3d(sample1, sample2, rbins,
-                  period=None, weights1=None, weights2=None,
-                  weight_func_id=0, verbose=False, num_threads=1,
+# cbx_aph: weight_function_id is not optional. However moving it to be a
+# required arg is a breaking change.
+def marked_npairs_3d(sample1, sample2, rbins, weight_func_id,
+                  period=None, weights1=None, weights2=None, num_threads=1,
                   approx_cell1_size=None, approx_cell2_size=None):
     """
-    Calculate the number of weighted pairs with separations greater than or equal to r, :math:`W(>r)`.
+    Calculate the weighted number of pairs with separations less than or equal to
+    the input ``rbins``, :math:`W(<r)`.
 
     The weight given to each pair is determined by the weights for a pair,
     :math:`w_1`, :math:`w_2`, and a user-specified "weighting function", indicated
     by the ``weight_func_id`` parameter, :math:`f(w_1,w_2)`.
 
     Note that if sample1 == sample2 that the `marked_npairs` function double-counts pairs.
 
+    Note that this does not count the number of pairs *between* the bins, but rather the
+    total number with separation smaller than each bin.
+
     Parameters
     ----------
     sample1 : array_like
@@ -51,6 +56,11 @@ def marked_npairs_3d(sample1, sample2, rbins,
         numpy array of length *Nrbins+1* defining the boundaries of bins in which
         pairs are counted.
 
+    weight_func_id : int, optional
+        weighting function integer ID. Each weighting function requires a specific
+        number of weights per point, *N_weights*.  See the Notes for a description of
+        available weighting functions.
+
     period : array_like, optional
         Length-3 sequence defining the periodic boundary conditions
         in each dimension. If you instead provide a single scalar, Lbox,
@@ -66,14 +76,6 @@ def marked_npairs_3d(sample1, sample2, rbins,
         containing the weights used for the weighted pair counts. If this parameter is
         None, the weights are set to np.ones(*(N1,N_weights)*).
 
-    weight_func_id : int, optional
-        weighting function integer ID. Each weighting function requires a specific
-        number of weights per point, *N_weights*.  See the Notes for a description of
-        available weighting functions.
-
-    verbose : Boolean, optional
-        If True, print out information and progress.
-
     num_threads : int, optional
         Number of threads to use in calculation, where parallelization is performed
         using the python ``multiprocessing`` module. Default is 1 for a purely serial
@@ -136,7 +138,7 @@ def marked_npairs_3d(sample1, sample2, rbins,
     """
 
     result = _npairs_3d_process_args(sample1, sample2, rbins, period,
-            verbose, num_threads, approx_cell1_size, approx_cell2_size)
+            num_threads, approx_cell1_size, approx_cell2_size)
     x1in, y1in, z1in, x2in, y2in, z2in = result[0:6]
     rbins, period, num_threads, PBCs, approx_cell1_size, approx_cell2_size = result[6:]
     xperiod, yperiod, zperiod = period
@@ -195,7 +197,7 @@ def _marked_npairs_process_weights(sample1, sample2, weights1, weights2, weight_
     _converted_to_2d_from_1d = False
     # First convert weights1 into a 2-d ndarray
     if weights1 is None:
-        weights1 = np.ones((npts_sample1, 1), dtype=np.float64)
+        weights1 = np.ones(correct_shape1, dtype=np.float64)
     else:
         weights1 = np.atleast_1d(weights1)
         weights1 = weights1.astype("float64")
@@ -236,7 +238,7 @@ def _marked_npairs_process_weights(sample1, sample2, weights1, weights2, weight_
     _converted_to_2d_from_1d = False
     # Now convert weights2 into a 2-d ndarray
     if weights2 is None:
-        weights2 = np.ones((npts_sample2, 1), dtype=np.float64)
+        weights2 = np.ones(correct_shape2, dtype=np.float64)
     else:
         weights2 = np.atleast_1d(weights2)
         weights2 = weights2.astype("float64")