From acdf6b43bbebaca26bb8477d04daf62aea84bdd9 Mon Sep 17 00:00:00 2001 From: Troy Raen Date: Mon, 22 Jul 2024 13:53:38 -0700 Subject: [PATCH] Clean up documentation (#63) * add content to faq pages * add autosummary to module pages * update add-new-schema isntructions * fix bug in example * add toctree to api home page * comment out a TODO * update CHANGELOG --- CHANGELOG.md | 2 + docs/source/api-reference/index.rst | 24 +++++----- docs/source/faq/what-is-bigquery.rst | 7 ++- docs/source/faq/what-is-cloud-run.rst | 13 ++++-- docs/source/faq/what-is-cloud-storage.rst | 7 ++- docs/source/faq/what-is-pubsub.rst | 9 +++- docs/source/for-developers/add-new-schema.rst | 44 ++++++------------- .../for-developers/get-alerts-for-testing.rst | 4 +- docs/source/one-time-setup/google-sdk.rst | 3 +- pittgoogle/alert.py | 9 +++- pittgoogle/auth.py | 7 +++ pittgoogle/bigquery.py | 10 ++++- pittgoogle/exceptions.py | 10 +++++ pittgoogle/pubsub.py | 12 ++++- pittgoogle/registry.py | 10 ++++- pittgoogle/schema.py | 17 ++++--- pittgoogle/types_.py | 9 +++- pittgoogle/utils.py | 9 +++- 18 files changed, 142 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5d0c95..75b391e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Changed - Reorganize and update data listings. +- Add FAQ content. +- Clean up docs. Remove 'TODO's. Add autosummary to module pages. ## \[v0.3.10\] - 2024-07-22 diff --git a/docs/source/api-reference/index.rst b/docs/source/api-reference/index.rst index 0303ffa..92e3801 100644 --- a/docs/source/api-reference/index.rst +++ b/docs/source/api-reference/index.rst @@ -3,16 +3,16 @@ pittgoogle ========== -[FIXME] This lists a subset of classes the user will interact with most. -Is this what we want? -Should at least add some text to clarify. +.. toctree:: + :caption: API Reference + :maxdepth: 1 -.. autosummary:: - - pittgoogle.alert.Alert - pittgoogle.bigquery.Table - pittgoogle.pubsub.Consumer - pittgoogle.pubsub.Subscription - pittgoogle.pubsub.Topic - pittgoogle.registry.ProjectIds - pittgoogle.registry.Schemas + alert + auth + bigquery + exceptions + pubsub + registry + schema + types_ + utils diff --git a/docs/source/faq/what-is-bigquery.rst b/docs/source/faq/what-is-bigquery.rst index 526b2f7..d65051c 100644 --- a/docs/source/faq/what-is-bigquery.rst +++ b/docs/source/faq/what-is-bigquery.rst @@ -1,4 +1,9 @@ What is BigQuery? ================= -Google Cloud's BigQuery is ... # [TODO] I've written this several times before -- find them. +Google `BigQuery `__ is a fully managed data warehouse with +a SQL-based analytics engine. +It is optimized for complex analytical queries on large datasets. +It uses a columnar storage format and relational table structure with support for nested and repeated fields. +Data can be loaded via batch jobs or streaming inserts. +Streamed data is typically available to queries immediately. diff --git a/docs/source/faq/what-is-cloud-run.rst b/docs/source/faq/what-is-cloud-run.rst index 5839776..62cae2f 100644 --- a/docs/source/faq/what-is-cloud-run.rst +++ b/docs/source/faq/what-is-cloud-run.rst @@ -1,4 +1,11 @@ -What is Cloud Run? -================== +What is Cloud Functions and Cloud Run? +====================================== -Google Cloud's Cloud Run is ... # [TODO] I've written this several times before -- find them. +Google `Cloud Functions `__ and +Google `Cloud Run `__ +are managed-compute services run by Google Cloud. +They both run containers that are configured as HTTP endpoints. +They can be used to process live message streams by attaching Pub/Sub push subscriptions. +Incoming requests (i.e., messages) are processed in parallel. +The number of container instances scales automatically and nearly instantaneously to meet incoming demand. +Differences between the services are essentially tradeoffs between efficiency (at large scale) and ease of use. diff --git a/docs/source/faq/what-is-cloud-storage.rst b/docs/source/faq/what-is-cloud-storage.rst index b9fe65b..9ff6602 100644 --- a/docs/source/faq/what-is-cloud-storage.rst +++ b/docs/source/faq/what-is-cloud-storage.rst @@ -1,4 +1,9 @@ What is Cloud Storage? ====================== -Google Cloud's Cloud Storage is ... # [TODO] I've written this several times before -- find them. +Google `Cloud Storage `__ is Google's object +(file) storage service. +Objects are stored in buckets. +Buckets have a flat namespace (meaning there is no such thing as a directory or folder), but +folder-style functionality is provided by most of the access tools (e.g., console and APIs) which +interpret folder hierarchies from slashes in the object name. diff --git a/docs/source/faq/what-is-pubsub.rst b/docs/source/faq/what-is-pubsub.rst index 96dbc81..ed391b0 100644 --- a/docs/source/faq/what-is-pubsub.rst +++ b/docs/source/faq/what-is-pubsub.rst @@ -1,4 +1,11 @@ What is Pub/Sub? ================= -Google Cloud's Pub/Sub is ... # [TODO] I've written this several times before -- find them. +Google `Pub/Sub `__ is a messaging service that +uses the publish-subscribe pattern. +Publishers and subscribers communicate asynchronously, with the Pub/Sub service handling all message storage and delivery. +Publishers send messages to a topic, and Pub/Sub immediately delivers them to all attached subscriptions. +Subscriptions can be configured to either push messages to a client automatically or to wait for a client to make a pull request. +The owner of the topic sets the access rights that determine who is allowed to attach a subscription. +Messages published to a topic prior to a subscription being created will not be available to the subscriber. +By default, Pub/Sub messages are not ordered. diff --git a/docs/source/for-developers/add-new-schema.rst b/docs/source/for-developers/add-new-schema.rst index e9cdcd2..a3f3d16 100644 --- a/docs/source/for-developers/add-new-schema.rst +++ b/docs/source/for-developers/add-new-schema.rst @@ -1,33 +1,22 @@ Add a new schema to the registry ================================ -[FIXME] This information is old. It needs to be updated to describe to the SchemaHelpers and Schema -child classes. - This page contains instructions for adding a new schema to the registry so that it can be loaded using :meth:`pittgoogle.Schemas.get` and used to serialize and deserialize the alert bytes. - -You will need to update at least the "Required" files listed below, and potentially one or more of the -others. The schema format is expected to be either Avro or Json. +Only Avro and JSON schemas have been implemented so far. First, a naming guideline: - Schema names are expected to start with the name of the survey. If the survey has more than one schema, the survey name should be followed by a "." and then schema-specific specifier(s). -Required --------- - pittgoogle/registry_manifests/schemas.yml -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +----------------------------------------- *pittgoogle/registry_manifests/schemas.yml* is the manifest of registered schemas. Add a new section to the manifest following the template provided there. The fields are the same as -those of a :class:`pittgoogle.schema.Schema`. The ``helper`` field must point to code that can find and load -the new schema definition; more information below. - -[FIXME] +those of a :class:`pittgoogle.schema.Schema`. Case 1: The schema definition is not needed in order to deserialize the alert bytes. This is true for all Json, and the Avro streams which attach the schema in the data header. You should be able to use the @@ -37,34 +26,27 @@ The rest of the cases assume the schema definition is required. This is true for which do not attach the schema to the data packet. Case 2: You can write some code that will get the schema definition from an external repository. You will -probably need to write your own ``helper`` method (more below). Follow ``lsst`` as an example. This is +probably need to write your own helper method (more below). Follow LSST as an example. This is preferable to Case 3 because it's usually easier to access new schema versions as soon as the survey releases them. -Case 3: You want to include schema definition files with the ``pittgoogle-client`` package. Follow -``elasticc`` as an example. (1) Commit the files to the repo under the *pittgoogle/schemas* directory. It -is recommended that the main filename follow the syntax ".avsc". (2) Point ``path`` +Case 3: You want to include schema definition files with the pittgoogle-client package. Follow +ELAsTiCC as an example. (1) Commit the files to the repo under the *pittgoogle/schemas* directory. It +is recommended that the main filename follow the syntax ".avsc". (2) Point 'path' at the main file, relative to the package root. If the Avro schema is split into multiple files, you usually only need to point to the main one. (3) If you've followed the recommendations then the default -``helper`` should work, but you should check (more below). If you need to implement your own helper +helper should work, but you should check (more below). If you need to implement your own helper or update the existing, do it. -Potentially Required --------------------- - pittgoogle/schema.py -^^^^^^^^^^^^^^^^^^^^ - -# [FIXME] -*pittgoogle/schema.py* is the file containing the :class:`pittgoogle.schema.Schema` class. +-------------------- -If ``schemaless_alert_bytes='false'``, the defaults (mostly null/None) should work and you can ignore -this file (skip to the next section). +*pittgoogle/schema.py* is the file containing the :class:`pittgoogle.schema.Schema` class and helpers. -A "helper" method must exist in :class:`pittgoogle.schema.Schema` that can find and load your new schema -definition. The ``helper`` field in the yaml manifest (above) must be set to the name of this method. If a +A "helper" method must exist in :class:`pittgoogle.schema.SchemaHelpers` that can find and load your new schema +definition. The 'helper' field in the yaml manifest (above) must be set to the name of this method. If a suitable helper method does not already already exist for your schema, add one to this file by following -existing helpers like :meth:`pittgoogle.schema.Schema.default_schema_helper` as examples. **If your helper +existing helpers like :meth:`pittgoogle.schema.SchemaHelpers.default_schema_helper` as examples. **If your helper method requires a new dependency, be sure to add it following :doc:`/main/for-developers/manage-dependencies-poetry`.** diff --git a/docs/source/for-developers/get-alerts-for-testing.rst b/docs/source/for-developers/get-alerts-for-testing.rst index 1ae9031..4d79198 100644 --- a/docs/source/for-developers/get-alerts-for-testing.rst +++ b/docs/source/for-developers/get-alerts-for-testing.rst @@ -1,8 +1,6 @@ Get alerts for testing ====================== -[FIXME] Everyone needs this, not just developers. Move this page to the user-demos repo. - Setup ----- @@ -26,7 +24,7 @@ Here are examples that get an alert from each of our "loop" streams: loop_sub.touch() - alert = loop.pull_batch(max_messages=1)[0] + alert = loop_sub.pull_batch(max_messages=1)[0] Get alerts from a file on disk ------------------------------- diff --git a/docs/source/one-time-setup/google-sdk.rst b/docs/source/one-time-setup/google-sdk.rst index 86c51c9..7f1b73c 100644 --- a/docs/source/one-time-setup/google-sdk.rst +++ b/docs/source/one-time-setup/google-sdk.rst @@ -54,4 +54,5 @@ Instruct gcloud to authenticate using your key file containing You may want to `create a configuration `__ if you use multiple projects or want to control settings like the default region. -# [TODO] give instructions to add the ``gcloud auth`` command to the conda activation file and/or to create a configuration and activate it with the conda env. +.. + # [TODO] give instructions to add the ``gcloud auth`` command to the conda activation file and/or to create a configuration and activate it with the conda env. diff --git a/pittgoogle/alert.py b/pittgoogle/alert.py index 6fa4af9..cd1d95f 100644 --- a/pittgoogle/alert.py +++ b/pittgoogle/alert.py @@ -1,5 +1,12 @@ # -*- coding: UTF-8 -*- -"""Classes for working with astronomical alerts.""" +"""Classes for working with astronomical alerts. + +.. autosummary:: + + Alert + +---- +""" import base64 import datetime import importlib.resources diff --git a/pittgoogle/auth.py b/pittgoogle/auth.py index fd46799..f16f7a5 100644 --- a/pittgoogle/auth.py +++ b/pittgoogle/auth.py @@ -7,6 +7,13 @@ :doc:`/one-time-setup/authentication`. The recommendation is to use a :ref:`service account ` and :ref:`set environment variables `. In that case, you will not need to call this module directly. + + +.. autosummary:: + + Auth + +---- """ import logging import os diff --git a/pittgoogle/bigquery.py b/pittgoogle/bigquery.py index d7959f0..a3deba0 100644 --- a/pittgoogle/bigquery.py +++ b/pittgoogle/bigquery.py @@ -1,5 +1,13 @@ # -*- coding: UTF-8 -*- -"""Classes to facilitate connections to BigQuery datasets and tables.""" +"""Classes to facilitate connections to BigQuery datasets and tables. + +.. autosummary:: + + Client + Table + +---- +""" import logging from typing import TYPE_CHECKING, Optional diff --git a/pittgoogle/exceptions.py b/pittgoogle/exceptions.py index 653308a..4069f2c 100644 --- a/pittgoogle/exceptions.py +++ b/pittgoogle/exceptions.py @@ -1,4 +1,14 @@ # -*- coding: UTF-8 -*- +"""Exceptions. + +.. autosummary:: + + BadRequest + CloudConnectionError + SchemaError + +---- +""" class BadRequest(Exception): """Raised when a Flask request json envelope (e.g., from Cloud Run) is invalid.""" diff --git a/pittgoogle/pubsub.py b/pittgoogle/pubsub.py index c01e113..ee585c7 100644 --- a/pittgoogle/pubsub.py +++ b/pittgoogle/pubsub.py @@ -1,5 +1,15 @@ # -*- coding: UTF-8 -*- -"""Classes to facilitate connections to Google Cloud Pub/Sub streams.""" +"""Classes to facilitate connections to Google Cloud Pub/Sub streams. + +.. autosummary:: + + Consumer + Response + Subscription + Topic + +---- +""" import concurrent.futures import datetime import importlib.resources diff --git a/pittgoogle/registry.py b/pittgoogle/registry.py index 1514338..b40510d 100644 --- a/pittgoogle/registry.py +++ b/pittgoogle/registry.py @@ -1,5 +1,13 @@ # -*- coding: UTF-8 -*- -"""Pitt-Google registries.""" +"""Pitt-Google registries. + +.. autosummary:: + + ProjectIds + Schemas + +---- +""" import importlib.resources import logging from typing import Final diff --git a/pittgoogle/schema.py b/pittgoogle/schema.py index 6532c16..237519f 100644 --- a/pittgoogle/schema.py +++ b/pittgoogle/schema.py @@ -1,5 +1,13 @@ # -*- coding: UTF-8 -*- -"""Classes to manage alert schemas.""" +"""Classes to manage alert schemas. + +.. autosummary:: + + Schema + SchemaHelpers + +---- +""" import importlib.resources import io import json @@ -68,11 +76,10 @@ def elasticc_schema_helper(schema_dict: dict) -> "Schema": @staticmethod def lsst_schema_helper(schema_dict: dict) -> "Schema": - """Load the Avro schema definition for lsst.v7_1.alert. + """Load the Avro schema definition for lsst.v7_1.alert.""" + # [FIXME] This is hack to get the latest schema version into pittgoogle-client + # until we can get :meth:`SchemaHelpers.lsst_auto_schema_helper` working. - [FIXME] This is hack to get the latest schema version into pittgoogle-client - until we can get :meth:`SchemaHelpers.lsst_auto_schema_helper` working. - """ if not schema_dict["name"] == "lsst.v7_1.alert": raise NotImplementedError("Only 'lsst.v7_1.alert' is supported for LSST.") diff --git a/pittgoogle/types_.py b/pittgoogle/types_.py index 900ac76..2a7baf2 100644 --- a/pittgoogle/types_.py +++ b/pittgoogle/types_.py @@ -1,5 +1,12 @@ # -*- coding: UTF-8 -*- -"""Classes defining new types.""" +"""Classes defining new types. + +.. autosummary:: + + PubsubMessageLike + +---- +""" import datetime import importlib.resources import logging diff --git a/pittgoogle/utils.py b/pittgoogle/utils.py index 823eae9..0ada4ca 100644 --- a/pittgoogle/utils.py +++ b/pittgoogle/utils.py @@ -1,5 +1,12 @@ # -*- coding: UTF-8 -*- -"""Classes and functions to support working with alerts and related data.""" +"""Classes and functions to support working with alerts and related data. + +.. autosummary:: + + Cast + +---- +""" import base64 import collections import io