From 27b03f37839283e5c21ceaa77aa9a6a34569fb29 Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Tue, 17 Mar 2015 08:07:34 +0900 Subject: [PATCH 01/13] Starting to sketch curriculum builds --- .../use-cases/sensor-data/build.gradle | 20 ++++---- courses/.gitignore | 1 + courses/DS210/build.gradle | 50 +++++++++++++++++++ gradle/plugins/curriculum.gradle | 2 + 4 files changed, 63 insertions(+), 10 deletions(-) create mode 100644 courses/.gitignore create mode 100644 courses/DS210/build.gradle diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle b/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle index b958e38a..ab11063b 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle @@ -1,14 +1,14 @@ buildscript { - repositories { - mavenLocal() - mavenCentral() - jcenter() - } - dependencies { - classpath "com.github.houbie:lesscss-gradle-plugin:1.0.3-less-1.7.0" - classpath 'com.bluepapa32:gradle-watch-plugin:0.1.5' - classpath 'org.asciidoctor:asciidoctor-gradle-plugin:1.5.2' - } + repositories { + mavenLocal() + mavenCentral() + jcenter() + } + dependencies { + classpath "com.github.houbie:lesscss-gradle-plugin:1.0.3-less-1.7.0" + classpath 'com.bluepapa32:gradle-watch-plugin:0.1.5' + classpath 'org.asciidoctor:asciidoctor-gradle-plugin:1.5.2' + } } apply plugin: 'com.bluepapa32.watch' diff --git a/courses/.gitignore b/courses/.gitignore new file mode 100644 index 00000000..d5cd4a6b --- /dev/null +++ b/courses/.gitignore @@ -0,0 +1 @@ +**/src/include.asc diff --git a/courses/DS210/build.gradle b/courses/DS210/build.gradle new file mode 100644 index 00000000..df3dab24 --- /dev/null +++ b/courses/DS210/build.gradle @@ -0,0 +1,50 @@ +buildscript { + repositories { + mavenLocal() + mavenCentral() + jcenter() + } + dependencies { + classpath "com.github.houbie:lesscss-gradle-plugin:1.0.3-less-1.7.0" + classpath 'com.bluepapa32:gradle-watch-plugin:0.1.5' + classpath 'org.asciidoctor:asciidoctor-gradle-plugin:1.5.2' + } +} + +apply plugin: 'com.bluepapa32.watch' +apply plugin: 'org.asciidoctor.gradle.asciidoctor' +apply plugin: 'lesscss' + +ext { + curriculumRootDir = '../..' +} + +import org.asciidoctor.gradle.AsciidoctorTask + +task slides(type: AsciidoctorTask) +task docs(type: AsciidoctorTask) + +apply from: "${curriculumRootDir}/gradle/plugins/curriculum.gradle" + + + +def vertices = [ + 'cassandra/dev/data-modeling/use-cases/sensor-data', +] + +task buildMasterPresentation { + File includeFile + doLast { + includeFile = file('src/include.asc') + includeFile.withWriter { writer -> + writer.println "= DS210: Data Modeling" + vertices.each { vertex -> + writer.println "include::${curriculumRootDir}/${vertex}/src/slides.adoc[]" + } + writer.flush() + } + } +} + +slides.dependsOn << buildMasterPresentation +docs.dependsOn << buildMasterPresentation diff --git a/gradle/plugins/curriculum.gradle b/gradle/plugins/curriculum.gradle index 18d17e06..8f62c14a 100644 --- a/gradle/plugins/curriculum.gradle +++ b/gradle/plugins/curriculum.gradle @@ -36,6 +36,8 @@ slides { include 'slides.adoc' } + println sourceDir + backends 'deckjs' options template_dirs : [new File(templateDir, 'haml').absolutePath ] From 63fea5c8e497e1c14ab0ec0e3abb4cdc686ac24d Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Mon, 30 Mar 2015 17:35:56 -0600 Subject: [PATCH 02/13] Did not rename a directory --- courses/{DS210 => DS220}/build.gradle | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename courses/{DS210 => DS220}/build.gradle (100%) diff --git a/courses/DS210/build.gradle b/courses/DS220/build.gradle similarity index 100% rename from courses/DS210/build.gradle rename to courses/DS220/build.gradle From 6cbaec4f76f54392066d0df8ed0cc80760c395dc Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Fri, 3 Apr 2015 15:36:36 -0600 Subject: [PATCH 03/13] Submodule updates --- framework/asciidoctor-backends | 2 +- framework/deck.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/framework/asciidoctor-backends b/framework/asciidoctor-backends index 64d207d0..e698b046 160000 --- a/framework/asciidoctor-backends +++ b/framework/asciidoctor-backends @@ -1 +1 @@ -Subproject commit 64d207d013e31bded25677eecd22c6e40b462f65 +Subproject commit e698b046119c27fcddd1b2a6885b3ea5e3274851 diff --git a/framework/deck.js b/framework/deck.js index 11d42496..4b711f99 160000 --- a/framework/deck.js +++ b/framework/deck.js @@ -1 +1 @@ -Subproject commit 11d4249639f4ff826143e7898068bd886b80349d +Subproject commit 4b711f9954efdde073e07668797a0da50c9a6fd4 From 5b3be798a8c6f972d9d4712dd737a60d5b8fe6a3 Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Fri, 3 Apr 2015 17:06:47 -0600 Subject: [PATCH 04/13] A prototype of a per-slide include system --- .../use-cases/sensor-data/src/slides.adoc | 308 +----------------- .../sensor-data/src/slides/analysis.adoc | 104 ++++++ .../src/slides/application-workflow.adoc | 10 + .../src/slides/conceptual-data-model.adoc | 18 + .../sensor-data/src/slides/introduction.adoc | 28 ++ .../src/slides/logical-data-model.adoc | 21 ++ .../src/slides/physical-data-model.adoc | 101 ++++++ .../slides/what-are-sensor-applications.adoc | 14 + gradle/plugins/curriculum.gradle | 2 +- 9 files changed, 305 insertions(+), 301 deletions(-) create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/introduction.adoc create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/what-are-sensor-applications.adoc diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc index 22a548ef..2597b299 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc @@ -8,306 +8,14 @@ DataStax Training :notes: :split: -== What are sensor applications? +include::slides/what-are-sensor-applications.adoc[] +include::slides/introduction.adoc[] +include::slides/conceptual-data-model.adoc[] +include::slides/application-workflow.adoc[] +include::slides/application-workflow.adoc[] +include::slides/logical-data-model.adoc[] +include::slides/analysis.adoc[] +include::slides/physical-data-model.adoc[] -* Agriculture -* Environment and natural resources -* Healthcare and wellness -* Homeland security -* Military -* Monitoring and control -* Retail -* Robotics and automation -* Smart home/office/auto -* Telematics -* Utilities - -== Sensor data: use case introduction - -*Data description* - -* Multiple sensor networks are deployed over non-overlapping regions -* A sensor network is identified by a unique name -* A sensor belongs to exactly one network -* A sensor has a unique identifier, location, and characteristics (e.g. accuracy, cost, manufacturing date) -* A sensor records new measurements (e.g. temperature, humidity, pressure) every second - -*_We will focus on temperature in this example_* - -== Sensor data: use case introduction - -*Application queries* - -* Q~1~: Find information about all networks; order by name (ASC) -* Q~2~: Find hourly average temperatures for all sensors in a specified network for a specified date range; order by date (DESC) and hour (DESC) -* Q~3~: Find information about all sensors in a specified network -* Q~4~: Find raw measurements for a particular sensor on a specified date; order by timestamp (DESC) - -[.notes] --- -Q2 is an example of time series data. We order by date and then hour. - -Q4 is an another example of time series data. Sensors continually collect data and store that data with their timestamp. --- - -== Sensor data: conceptual data model - -*Keys* - -* [blue]#*has*#: sensor id -* [blue]#*records*# and [blue]#*Measurement*#: sensor id, timestamp, parameter - -image::../images/conceptualdatamodel.svg[] - -[.notes] --- -Measurement's [emphasis]#*parameter*# attribute indicates [emphasis]#*value*#'s unit (temperature, humidity, etc.). In this example, we always record temperature. - -The double-lined diamond indicates an identifying relationship. The double-lined [emphasis]#*Measurement*# indicates a weak entity type. Thus, a [emphasis]#*Measurement*# cannot exist without an identifying [emphasis]#*Sensor*#. If we delete a [emphasis]#*Sensor*#, we must delete all of its associated [emphasis]#*Measurements*#. Without a [emphasis]#*Sensor*#, the [emphasis]#*Measurement*# does not have a [emphasis]#*location*# or [emphasis]#*characteristics*#. - -The key of the weak entity type also depends on the key of the strong entity type in the identifying relationship. Thus, [emphasis]#*Sensor#*'s [emphasis]#*id*# makes part of [emphasis]#*Measurement*#'s key, as the slide indicates. - --- - -== Sensor data: application workflow - -image::../images/applicationworkflow.svg[] - -[.notes] --- -We organize our queries by work flow. The first query retrieves all networks, identified by their names (in this case, we name the networks by number). The second query uses the network name to retrieve the hourly average temperature in a given date range. Using that information, we can generate a heat map for a single point in time. We can also generate a heat-map animation over a time range. - -Using the third query, we can produce a geographical image of all of our sensors. The user can then click a sensor for which we can further provide the raw data for a specific day via the fourth query. --- - -== Sensor data: logical data model - -image::../images/logicaldatamodel.svg[] - -[.notes] - -For Q1, the [emphasis]#*Network#* table stores all of the networks in a single partition. Thus, the partition key is a dummy value. To retrieve this single partition, we write: - -**** - -SELECT * FROM Networks - -**** - -This storage technique does not require a WHERE clause. - -The dummy data can be any data. Each row in the CQL result represents a network. The partition is small as we only have a handful of networks. - -For Q2, we retrieve the average hourly temperature for a given date range. We also record the sensor location to later produce the heat map. - -For Q3, we retrieve all the sensors within a network. Making [emphasis]#*sensor*# part of the primary key handles the "[emphasis]#*Network*# has [emphasis]#*Sensor*#" relationship. - - -== Sensor data: analysis - -*Partition size* - -*Networks* -* One small partition - -image::../images/logical_networks.svg[float="left"] - -<<< - --- -*Sensor_by_network* - -* Assume atn most 1,000 sensors per network -* Manageable partitions - -image::../images/logical_sensors_by_network.svg[float="left"] --- - -<<< - --- -*Temperatures_by_sensor* - -* 86,400 seconds per day -* Manageable partitions - -image::../images/logical_temperatures_by_sensor.svg[float="left"] --- - -[.notes] --- -The next step is to analyse partition sizes for manageability. - -*_Networks_* - -* The single partition is small as we only have a dozen or so networks. - -*_Sensor_by_network_* - -* With only 1000 sensors per network, we have manageable partitions. -* We can apply the formula we saw earlier to prove this. - -*_Temperatures_by_sensor_* - -* We sample temperatures each second and partition by one day. -* 86,400 samples make a manageable partition. --- - -== Sensor data: analysis - -*Partition size* - -*Temperatures_by_network* - -** Assume at most 1,000 sensors per network -** 24 hours per day - -image::../images/logical_temperatures_by_network.svg[float="left"] - -<<< - -*_365 days_* (1 year) - -**** 365 x 24 x 1000 = 8,765,000 -**** Large partition - - -<<< - -*_30 days_* (1 month) - -**** 30 x 24 x 1000 = 720,000 -**** Somewhat manageable - -<<< - -*_7 days_* (1 week) - -**** 7 x 24 x 1000 = 168,000 -**** Manageable partitions - -[.notes] -The partition size for [emphasis]#*Temperatures_by_network*# will grow too large. In one partition, we accumulate 1,000 average temperatures every hour. Although we may handle this for the first month, our partition sizes will quickly become unmanageable. We can fix this by dividing our partitions by weeks. - -== Sensor data: analysis -*Duplication* - -* How many times is *_region_* stored per network? -* How many times is *_location_* stored per sensor? - -image::../images/logical_networks.svg[] -image::../images/logical_sensors_by_network.svg[] -image::../images/logical_temperatures_by_network.svg[] - -[.notes] --- -In our analysis, we also want to remove unnecessary duplication. - -Although region appears in [emphasis]#*Networks*# and [emphasis]#*Temperatures_by_network*#, duplication is minimal. In [emphasis]#*Networks*#, [emphasis]#*region*# will be a unique value. In [emphasis]#*Temperatures_by_network*#, [emphasis]#*region*# is a static column. - -We store each location value once in [emphasis]#*Sensor_by_network*#. However, in [emphasis]#*Temperatures_by_network*#, we store [emphasis]#*location*# several times. Although this appears to be unnecessary duplication, we need [emphasis]#*location* here to generate our heat map. So [emphasis]#*location*# is duplicate data, but it is not duplicate information. Also, adding [emphasis]#*location*# to [emphasis]#*Temperatures_by_network*# increases the value size by a factor of two (it is one more column in addition to [emphasis]#*avg_temp*#). --- - -== Sensor data: physical data model - -image::../images/physicaldatamodel.svg[] - -[.notes] --- -We added week to our [emphasis]#*Temperatures_by_network*# partition key to make our partition sizes manageable. We can say that the first week in January of 2010 represents week 1 and so forth. Thus the first 52 weeks make up 2010, and then the first week of 2011 is 53. One downside to this approach is if you want to create a heat map that spans several weeks, your query must retrieve multiple partitions. - -We also merged the date and hour into [emphasis]#*date_hour*# because the TIMESTAMP data type can store both. --- - -== Sensor data: physical data model - -**** - -image::../images/physical_networks.svg[float="right"] - - CREATE TABLE networks ( - dummy TEXT, - name TEXT, - region TEXT, - description TEXT, - n_sensors INT, - PRIMARY KEY (dummy, name) - ); - - -- Q1 - SELECT * - FROM networks; - -**** - -[.notes] - -Throughout this and the next three slides, notice that the queries are simple because we designed our tables specifically to support them. - -== Sensor data: physical data model - -**** - -image::../images/physical_temperatures_by_network.svg[float="right"] - - CREATE TABLE temperatures_by_network ( - network TEXT, - week INT, - date_hour TIMESTAMP, - sensor TEXT, - avg_temp FLOAT, - location TEXT, - region TEXT STATIC, - PRIMARY KEY ( (network, week), date_hour, sensor) - ) - WITH CLUSTERING ORDER BY (date_hour DESC, sensor ASC); - - -- Q2 - SELECT * FROM temperatures_by_network - WHERE network = ? AND week = ? - AND date_hour >= ? AND date_hour <= ?; - -**** - -== Sensor data: physical data model - -**** - -image::../images/physical_sensors_by_network.svg[float="right"] - - CREATE TABLE sensors_by_network ( - network TEXT, - sensor TEXT, - location TEXT, - chracteristics MAP, - PRIMARY KEY (network, sensor) - ); - - -- Q3 - SELECT * FROM sensors_by_network - WHERE network = ?; - -**** - -== Sensor data: physical data model - -**** - -image::../images/physical_temperatures_by_sensor.svg[float="right"] - - CREATE TABLE temperatures_by_sensor ( - sensor TEXT, - date TIMESTAMP, - ts TIMESTAMP, - temp FLOAT, - PRIMARY KEY ((sensor, date), ts) - ) - WITH CLUSTERING ORDER BY (ts DESC); - - -- Q4 - SELECT * FROM temperatures_by_sensor - WHERE sensor = ? AND date = ?; - -**** == End of presentation diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc new file mode 100644 index 00000000..7fbf9fe7 --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc @@ -0,0 +1,104 @@ +== Sensor data: analysis + +*Partition size* + +*Networks* +* One small partition + +image::images/logical_networks.svg[float="left"] + +<<< + +-- +*Sensor_by_network* + +* Assume atn most 1,000 sensors per network +* Manageable partitions + +image::images/logical_sensors_by_network.svg[float="left"] +-- + +<<< + +-- +*Temperatures_by_sensor* + +* 86,400 seconds per day +* Manageable partitions + +image::images/logical_temperatures_by_sensor.svg[float="left"] +-- + +[.notes] +-- +The next step is to analyse partition sizes for manageability. + +*_Networks_* + +* The single partition is small as we only have a dozen or so networks. + +*_Sensor_by_network_* + +* With only 1000 sensors per network, we have manageable partitions. +* We can apply the formula we saw earlier to prove this. + +*_Temperatures_by_sensor_* + +* We sample temperatures each second and partition by one day. +* 86,400 samples make a manageable partition. +-- + +== Sensor data: analysis + +*Partition size* + +*Temperatures_by_network* + +** Assume at most 1,000 sensors per network +** 24 hours per day + +image::images/logical_temperatures_by_network.svg[float="left"] + +<<< + +*_365 days_* (1 year) + +**** 365 x 24 x 1000 = 8,765,000 +**** Large partition + + +<<< + +*_30 days_* (1 month) + +**** 30 x 24 x 1000 = 720,000 +**** Somewhat manageable + +<<< + +*_7 days_* (1 week) + +**** 7 x 24 x 1000 = 168,000 +**** Manageable partitions + +[.notes] +The partition size for [emphasis]#*Temperatures_by_network*# will grow too large. In one partition, we accumulate 1,000 average temperatures every hour. Although we may handle this for the first month, our partition sizes will quickly become unmanageable. We can fix this by dividing our partitions by weeks. + +== Sensor data: analysis +*Duplication* + +* How many times is *_region_* stored per network? +* How many times is *_location_* stored per sensor? + +image::images/logical_networks.svg[] +image::images/logical_sensors_by_network.svg[] +image::images/logical_temperatures_by_network.svg[] + +[.notes] +-- +In our analysis, we also want to remove unnecessary duplication. + +Although region appears in [emphasis]#*Networks*# and [emphasis]#*Temperatures_by_network*#, duplication is minimal. In [emphasis]#*Networks*#, [emphasis]#*region*# will be a unique value. In [emphasis]#*Temperatures_by_network*#, [emphasis]#*region*# is a static column. + +We store each location value once in [emphasis]#*Sensor_by_network*#. However, in [emphasis]#*Temperatures_by_network*#, we store [emphasis]#*location*# several times. Although this appears to be unnecessary duplication, we need [emphasis]#*location* here to generate our heat map. So [emphasis]#*location*# is duplicate data, but it is not duplicate information. Also, adding [emphasis]#*location*# to [emphasis]#*Temperatures_by_network*# increases the value size by a factor of two (it is one more column in addition to [emphasis]#*avg_temp*#). +-- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc new file mode 100644 index 00000000..7e3242e2 --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc @@ -0,0 +1,10 @@ +== Sensor data: application workflow + +image::images/applicationworkflow.svg[] + +[.notes] +-- +We organize our queries by work flow. The first query retrieves all networks, identified by their names (in this case, we name the networks by number). The second query uses the network name to retrieve the hourly average temperature in a given date range. Using that information, we can generate a heat map for a single point in time. We can also generate a heat-map animation over a time range. + +Using the third query, we can produce a geographical image of all of our sensors. The user can then click a sensor for which we can further provide the raw data for a specific day via the fourth query. +-- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc new file mode 100644 index 00000000..7007a7f1 --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc @@ -0,0 +1,18 @@ +== Sensor data: conceptual data model + +*Keys* + +* [blue]#*has*#: sensor id +* [blue]#*records*# and [blue]#*Measurement*#: sensor id, timestamp, parameter + +image::images/conceptualdatamodel.svg[] + +[.notes] +-- +Measurement's [emphasis]#*parameter*# attribute indicates [emphasis]#*value*#'s unit (temperature, humidity, etc.). In this example, we always record temperature. + +The double-lined diamond indicates an identifying relationship. The double-lined [emphasis]#*Measurement*# indicates a weak entity type. Thus, a [emphasis]#*Measurement*# cannot exist without an identifying [emphasis]#*Sensor*#. If we delete a [emphasis]#*Sensor*#, we must delete all of its associated [emphasis]#*Measurements*#. Without a [emphasis]#*Sensor*#, the [emphasis]#*Measurement*# does not have a [emphasis]#*location*# or [emphasis]#*characteristics*#. + +The key of the weak entity type also depends on the key of the strong entity type in the identifying relationship. Thus, [emphasis]#*Sensor#*'s [emphasis]#*id*# makes part of [emphasis]#*Measurement*#'s key, as the slide indicates. + +-- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/introduction.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/introduction.adoc new file mode 100644 index 00000000..b9f0b1ef --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/introduction.adoc @@ -0,0 +1,28 @@ + +== Sensor data: use case introduction + +*Data description* + +* Multiple sensor networks are deployed over non-overlapping regions +* A sensor network is identified by a unique name +* A sensor belongs to exactly one network +* A sensor has a unique identifier, location, and characteristics (e.g. accuracy, cost, manufacturing date) +* A sensor records new measurements (e.g. temperature, humidity, pressure) every second + +*_We will focus on temperature in this example_* + +== Sensor data: use case introduction + +*Application queries* + +* Q~1~: Find information about all networks; order by name (ASC) +* Q~2~: Find hourly average temperatures for all sensors in a specified network for a specified date range; order by date (DESC) and hour (DESC) +* Q~3~: Find information about all sensors in a specified network +* Q~4~: Find raw measurements for a particular sensor on a specified date; order by timestamp (DESC) + +[.notes] +-- +Q2 is an example of time series data. We order by date and then hour. + +Q4 is an another example of time series data. Sensors continually collect data and store that data with their timestamp. +-- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc new file mode 100644 index 00000000..a6a7f6ef --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc @@ -0,0 +1,21 @@ +== Sensor data: logical data model + +image::images/logicaldatamodel.svg[] + +[.notes] + +For Q1, the [emphasis]#*Network#* table stores all of the networks in a single partition. Thus, the partition key is a dummy value. To retrieve this single partition, we write: + +**** + +SELECT * FROM Networks + +**** + +This storage technique does not require a WHERE clause. + +The dummy data can be any data. Each row in the CQL result represents a network. The partition is small as we only have a handful of networks. + +For Q2, we retrieve the average hourly temperature for a given date range. We also record the sensor location to later produce the heat map. + +For Q3, we retrieve all the sensors within a network. Making [emphasis]#*sensor*# part of the primary key handles the "[emphasis]#*Network*# has [emphasis]#*Sensor*#" relationship. diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc new file mode 100644 index 00000000..12e2417f --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc @@ -0,0 +1,101 @@ +== Sensor data: physical data model + +image::images/physicaldatamodel.svg[] + +[.notes] +-- +We added week to our [emphasis]#*Temperatures_by_network*# partition key to make our partition sizes manageable. We can say that the first week in January of 2010 represents week 1 and so forth. Thus the first 52 weeks make up 2010, and then the first week of 2011 is 53. One downside to this approach is if you want to create a heat map that spans several weeks, your query must retrieve multiple partitions. + +We also merged the date and hour into [emphasis]#*date_hour*# because the TIMESTAMP data type can store both. +-- + +== Sensor data: physical data model + +**** + +image::images/physical_networks.svg[float="right"] + + CREATE TABLE networks ( + dummy TEXT, + name TEXT, + region TEXT, + description TEXT, + n_sensors INT, + PRIMARY KEY (dummy, name) + ); + + -- Q1 + SELECT * + FROM networks; + +**** + +[.notes] + +Throughout this and the next three slides, notice that the queries are simple because we designed our tables specifically to support them. + +== Sensor data: physical data model + +**** + +image::images/physical_temperatures_by_network.svg[float="right"] + + CREATE TABLE temperatures_by_network ( + network TEXT, + week INT, + date_hour TIMESTAMP, + sensor TEXT, + avg_temp FLOAT, + location TEXT, + region TEXT STATIC, + PRIMARY KEY ( (network, week), date_hour, sensor) + ) + WITH CLUSTERING ORDER BY (date_hour DESC, sensor ASC); + + -- Q2 + SELECT * FROM temperatures_by_network + WHERE network = ? AND week = ? + AND date_hour >= ? AND date_hour <= ?; + +**** + +== Sensor data: physical data model + +**** + +image::images/physical_sensors_by_network.svg[float="right"] + + CREATE TABLE sensors_by_network ( + network TEXT, + sensor TEXT, + location TEXT, + chracteristics MAP, + PRIMARY KEY (network, sensor) + ); + + -- Q3 + SELECT * FROM sensors_by_network + WHERE network = ?; + +**** + +== Sensor data: physical data model + +**** + +image::images/physical_temperatures_by_sensor.svg[float="right"] + + CREATE TABLE temperatures_by_sensor ( + sensor TEXT, + date TIMESTAMP, + ts TIMESTAMP, + temp FLOAT, + PRIMARY KEY ((sensor, date), ts) + ) + WITH CLUSTERING ORDER BY (ts DESC); + + -- Q4 + SELECT * FROM temperatures_by_sensor + WHERE sensor = ? AND date = ?; + +**** diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/what-are-sensor-applications.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/what-are-sensor-applications.adoc new file mode 100644 index 00000000..854b9f81 --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/what-are-sensor-applications.adoc @@ -0,0 +1,14 @@ + +== What are sensor applications? + +* Agriculture +* Environment and natural resources +* Healthcare and wellness +* Homeland security +* Military +* Monitoring and control +* Retail +* Robotics and automation +* Smart home/office/auto +* Telematics +* Utilities diff --git a/gradle/plugins/curriculum.gradle b/gradle/plugins/curriculum.gradle index 8f62c14a..355a78d6 100644 --- a/gradle/plugins/curriculum.gradle +++ b/gradle/plugins/curriculum.gradle @@ -48,7 +48,7 @@ slides { attributes idseparator: '-' resources { - from ("${projectDir}/images") { + from ("${projectDir}") { include '**/*.svg' } from(frameworkDir) { From 7d783f74abd53c7c4d6c986ae4275da3f364ccaf Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Fri, 3 Apr 2015 18:10:55 -0600 Subject: [PATCH 05/13] Broke out includes.adoc into its own file. Yes, kids, this might seem like an unhealthy level of indirection. Why not another file that includes the file that includes the files? Turtles all the way down, right? Well, turns out that `include.adoc` is a handy way to indicate the intended order of the slides themselves _without_ itself containing an H1 outline tag. This allows us to construct a course build that can grab whole vertices or individual slides, without parsing any Asciidoc files at all. So it's not as crazy at it looks. --- .../use-cases/sensor-data/src/includes.adoc | 8 ++++++++ .../use-cases/sensor-data/src/slides.adoc | 10 +--------- 2 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc new file mode 100644 index 00000000..3f70f994 --- /dev/null +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc @@ -0,0 +1,8 @@ +include::slides/what-are-sensor-applications.adoc[] +include::slides/introduction.adoc[] +include::slides/conceptual-data-model.adoc[] +include::slides/application-workflow.adoc[] +include::slides/application-workflow.adoc[] +include::slides/logical-data-model.adoc[] +include::slides/analysis.adoc[] +include::slides/physical-data-model.adoc[] diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc index 2597b299..c0cac4c9 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc @@ -8,14 +8,6 @@ DataStax Training :notes: :split: -include::slides/what-are-sensor-applications.adoc[] -include::slides/introduction.adoc[] -include::slides/conceptual-data-model.adoc[] -include::slides/application-workflow.adoc[] -include::slides/application-workflow.adoc[] -include::slides/logical-data-model.adoc[] -include::slides/analysis.adoc[] -include::slides/physical-data-model.adoc[] - +include::includes.adoc[] == End of presentation From e563b6b800223be261659f2a17187e7d009503a4 Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Fri, 3 Apr 2015 18:13:29 -0600 Subject: [PATCH 06/13] Removed instructor notes just cuz --- .../sensor-data/src/instructor-notes.adoc | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 cassandra/dev/data-modeling/use-cases/sensor-data/src/instructor-notes.adoc diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/instructor-notes.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/instructor-notes.adoc deleted file mode 100644 index bd25ce9e..00000000 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/instructor-notes.adoc +++ /dev/null @@ -1,74 +0,0 @@ -//// -In order to hide the instructor comments and make this file student-notes instead, -add a ! to the end of instructor, like: -:instructor!: -//// -//// -This attribute is used to show/hide the instructor-only notes in this file. -//// -:instructor: - -== *DS220 Apache Cassandra Data Modeling* - -=== *Data Modeling Use Case* - -==== *Slide 1: What are sensor applications?* - -ifdef::instructor[] -[NOTE] -*Instructor:* This might be a good opportunity to open the floor to discussion about Cassandra and sensor applications. -endif::instructor[] - -==== *Slide 2: Sensor data: use case introduction* - -ifdef::instructor[] -[NOTE] -*Instructor:* These are a quick round-up and review of everything taught over -the course of 2 days. Do not get bogged down, but summarize what was -taught and bridge each slide together with their relationship. -endif::instructor[] - -==== *Slide 3: More Sensor data: use case introduction* - -ifdef::instructor[] -[NOTE] -*Instructor:* It cannot be overemphasized that *query-driven* design is important -in Cassandra. This may seem like a departure for the relational database designers -in your audience. -endif::instructor[] - -==== *Slide 4: Sensor data: conceptual data model* - -ifdef::instructor[] -[NOTE] -*Instructor:* Remind participants that the conceptual data model is database-agnostic, -and uses Chen notation. -endif::instructor[] - -==== *Slide 6: Sensor data: logical data model* - -ifdef::instructor[] -[NOTE] -*Instructor:* This is the meat of this example. Discuss WHY certain choices are made -for the primary key and clustering columns, in order to answer the queries that were -designed. -endif::instructor[] - -==== *Slides 7-9: Sensor data: analysis* - -ifdef::instructor[] -[NOTE] -*Instructor:* The estimates done here are simply to give a magnitude of the scale -that may result from the table design. Note that the analysis done here points to -storing weekly data to end up with manageable partitions. -endif::instructor[] - -==== *Slide 10-14: Sensor data: physical data model* - -ifdef::instructor[] -[NOTE] -*Instructor:* Some questions to ask the students as you go through the tables are: -(1) what similarities do you see in the tables? -(2) what differences? -(3) why is clustering order important? -endif::instructor[] From ad1dbe0e9676c750278924c99b469034aafe37c8 Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Fri, 3 Apr 2015 18:31:59 -0600 Subject: [PATCH 07/13] Course slide include almost working. Image paths don't work yet, and frankly I think that's gonna be nasty. --- .../use-cases/sensor-data/src/includes.adoc | 16 ++++++++-------- .../use-cases/sensor-data/src/slides.adoc | 1 + courses/.gitignore | 2 +- courses/DS220/build.gradle | 18 +++++++++++++++--- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc index 3f70f994..3eb1056c 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/includes.adoc @@ -1,8 +1,8 @@ -include::slides/what-are-sensor-applications.adoc[] -include::slides/introduction.adoc[] -include::slides/conceptual-data-model.adoc[] -include::slides/application-workflow.adoc[] -include::slides/application-workflow.adoc[] -include::slides/logical-data-model.adoc[] -include::slides/analysis.adoc[] -include::slides/physical-data-model.adoc[] +include::{slide_path}/what-are-sensor-applications.adoc[] +include::{slide_path}/introduction.adoc[] +include::{slide_path}/conceptual-data-model.adoc[] +include::{slide_path}/application-workflow.adoc[] +include::{slide_path}/application-workflow.adoc[] +include::{slide_path}/logical-data-model.adoc[] +include::{slide_path}/analysis.adoc[] +include::{slide_path}/physical-data-model.adoc[] diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc index c0cac4c9..8af2b248 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc @@ -8,6 +8,7 @@ DataStax Training :notes: :split: +:slide_path: slides include::includes.adoc[] == End of presentation diff --git a/courses/.gitignore b/courses/.gitignore index d5cd4a6b..7795cadf 100644 --- a/courses/.gitignore +++ b/courses/.gitignore @@ -1 +1 @@ -**/src/include.asc +**/src/slides.adoc diff --git a/courses/DS220/build.gradle b/courses/DS220/build.gradle index df3dab24..9f8f22e9 100644 --- a/courses/DS220/build.gradle +++ b/courses/DS220/build.gradle @@ -35,11 +35,23 @@ def vertices = [ task buildMasterPresentation { File includeFile doLast { - includeFile = file('src/include.asc') + includeFile = file('src/slides.adoc') includeFile.withWriter { writer -> - writer.println "= DS210: Data Modeling" + writer.println """\ += DS210: Data Modeling +DataStax Training +:backend: deckjs +:deckjs_theme: datastax +:deckjs_transition: fade +:navigation: +:status: +:notes: +:split: +""" vertices.each { vertex -> - writer.println "include::${curriculumRootDir}/${vertex}/src/slides.adoc[]" +// writer.println ":slide_path: ../${curriculumRootDir}/${vertex}/src/slides" + writer.println ":slide_path: slides" + writer.println "include::../${curriculumRootDir}/${vertex}/src/includes.adoc[]" } writer.flush() } From a92669f7a242dbdfef879ebdc93d3b1cd7b285bd Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Wed, 15 Apr 2015 08:54:58 -0600 Subject: [PATCH 08/13] I tried building a course with images in the slides. I couldn't believe what happened! This commit has images working in course slide builds. To make this work, the course build needs to collect images from each vertex and put them in its own build dir. Realistically (skipping a couple of syllogisms), this means that the Asciidoc image path for _course_ slides has to be different from the per-vertex build. Thus we introduce the {image_path} attribute in each slide's Asciidoc file. You can see this in the sensor-data vertex examples in this branch, which is still functioning as the sample vertex. (We should extract that into an official template vertex soon.) Note that the other .adoc files are not working the course build as of this commit. DEVELOPING... --- .../use-cases/sensor-data/src/slides.adoc | 1 + .../use-cases/sensor-data/src/slides/analysis.adoc | 14 +++++++------- .../src/slides/application-workflow.adoc | 2 +- .../src/slides/conceptual-data-model.adoc | 2 +- .../sensor-data/src/slides/logical-data-model.adoc | 2 +- .../src/slides/physical-data-model.adoc | 10 +++++----- courses/DS220/build.gradle | 5 +++++ 7 files changed, 21 insertions(+), 15 deletions(-) diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc index 8af2b248..78b56d4b 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides.adoc @@ -9,6 +9,7 @@ DataStax Training :split: :slide_path: slides +:image_path: images include::includes.adoc[] == End of presentation diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc index 7fbf9fe7..ad9b2aa0 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/analysis.adoc @@ -5,7 +5,7 @@ *Networks* * One small partition -image::images/logical_networks.svg[float="left"] +image::{image_path}/logical_networks.svg[float="left"] <<< @@ -15,7 +15,7 @@ image::images/logical_networks.svg[float="left"] * Assume atn most 1,000 sensors per network * Manageable partitions -image::images/logical_sensors_by_network.svg[float="left"] +image::{image_path}/logical_sensors_by_network.svg[float="left"] -- <<< @@ -26,7 +26,7 @@ image::images/logical_sensors_by_network.svg[float="left"] * 86,400 seconds per day * Manageable partitions -image::images/logical_temperatures_by_sensor.svg[float="left"] +image::{image_path}/logical_temperatures_by_sensor.svg[float="left"] -- [.notes] @@ -57,7 +57,7 @@ The next step is to analyse partition sizes for manageability. ** Assume at most 1,000 sensors per network ** 24 hours per day -image::images/logical_temperatures_by_network.svg[float="left"] +image::{image_path}/logical_temperatures_by_network.svg[float="left"] <<< @@ -90,9 +90,9 @@ The partition size for [emphasis]#*Temperatures_by_network*# will grow too large * How many times is *_region_* stored per network? * How many times is *_location_* stored per sensor? -image::images/logical_networks.svg[] -image::images/logical_sensors_by_network.svg[] -image::images/logical_temperatures_by_network.svg[] +image::{image_path}/logical_networks.svg[] +image::{image_path}/logical_sensors_by_network.svg[] +image::{image_path}/logical_temperatures_by_network.svg[] [.notes] -- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc index 7e3242e2..b9382940 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/application-workflow.adoc @@ -1,6 +1,6 @@ == Sensor data: application workflow -image::images/applicationworkflow.svg[] +image::{image_path}/applicationworkflow.svg[] [.notes] -- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc index 7007a7f1..aec42175 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/conceptual-data-model.adoc @@ -5,7 +5,7 @@ * [blue]#*has*#: sensor id * [blue]#*records*# and [blue]#*Measurement*#: sensor id, timestamp, parameter -image::images/conceptualdatamodel.svg[] +image::{image_path}/conceptualdatamodel.svg[] [.notes] -- diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc index a6a7f6ef..048d2b97 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/logical-data-model.adoc @@ -1,6 +1,6 @@ == Sensor data: logical data model -image::images/logicaldatamodel.svg[] +image::{image_path}/logicaldatamodel.svg[] [.notes] diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc index 12e2417f..427e9ff8 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/slides/physical-data-model.adoc @@ -1,6 +1,6 @@ == Sensor data: physical data model -image::images/physicaldatamodel.svg[] +image::{image_path}/physicaldatamodel.svg[] [.notes] -- @@ -13,7 +13,7 @@ We also merged the date and hour into [emphasis]#*date_hour*# because the TIMEST **** -image::images/physical_networks.svg[float="right"] +image::{image_path}/physical_networks.svg[float="right"] CREATE TABLE networks ( dummy TEXT, @@ -38,7 +38,7 @@ Throughout this and the next three slides, notice that the queries are simple be **** -image::images/physical_temperatures_by_network.svg[float="right"] +image::{image_path}/physical_temperatures_by_network.svg[float="right"] CREATE TABLE temperatures_by_network ( network TEXT, @@ -63,7 +63,7 @@ image::images/physical_temperatures_by_network.svg[float="right"] **** -image::images/physical_sensors_by_network.svg[float="right"] +image::{image_path}/physical_sensors_by_network.svg[float="right"] CREATE TABLE sensors_by_network ( network TEXT, @@ -83,7 +83,7 @@ image::images/physical_sensors_by_network.svg[float="right"] **** -image::images/physical_temperatures_by_sensor.svg[float="right"] +image::{image_path}/physical_temperatures_by_sensor.svg[float="right"] CREATE TABLE temperatures_by_sensor ( sensor TEXT, diff --git a/courses/DS220/build.gradle b/courses/DS220/build.gradle index 9f8f22e9..5d6d3a98 100644 --- a/courses/DS220/build.gradle +++ b/courses/DS220/build.gradle @@ -51,7 +51,12 @@ DataStax Training vertices.each { vertex -> // writer.println ":slide_path: ../${curriculumRootDir}/${vertex}/src/slides" writer.println ":slide_path: slides" + writer.println ":image_path: ${buildDir}/images/${vertex}" writer.println "include::../${curriculumRootDir}/${vertex}/src/includes.adoc[]" + copy { + from "${curriculumRootDir}/${vertex}/images" + into "${buildDir}/images/${vertex}" + } } writer.flush() } From e2be4269d6451f1aa07d3958b66b62a5f8f0908b Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Thu, 16 Apr 2015 07:40:59 -0600 Subject: [PATCH 09/13] He got course-level exercise builds to work by changing these five files. #4 brought tears to my eyes! I added a bit of imperative code to the course-level build to suck in all the vertex-level exercise files. Had to tweak the vertex-level build just a little bit get image pathing to work out. Importantly, the `{image_path}` attribute is now a required part of each image path in vertex-level `exercises.adoc` files. Just like in the slides. --- .../use-cases/sensor-data/build.gradle | 4 ++++ .../use-cases/sensor-data/src/exercises.adoc | 7 ++----- courses/.gitignore | 1 + courses/DS220/build.gradle | 18 +++++++++++++++++- gradle/plugins/curriculum.gradle | 3 +-- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle b/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle index ab11063b..63a97881 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/build.gradle @@ -25,3 +25,7 @@ task slides(type: AsciidoctorTask) task docs(type: AsciidoctorTask) apply from: "${curriculumRootDir}/gradle/plugins/curriculum.gradle" + +docs { + attributes 'image_path': 'images' +} diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/exercises.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/exercises.adoc index ee088a07..50677973 100644 --- a/cassandra/dev/data-modeling/use-cases/sensor-data/src/exercises.adoc +++ b/cassandra/dev/data-modeling/use-cases/sensor-data/src/exercises.adoc @@ -1,6 +1,3 @@ -:source-highlighter: pygments - -= DS220 Apache Cassandra Data Modeling == Data Modeling Use Case @@ -17,9 +14,9 @@ . Review the data modeling steps. -image::images/usecaseimage1.jpg[] +image::{image_path}/usecaseimage1.jpg[] -image::images/investmentreview.svg[] +image::{image_path}/investmentreview.svg[] ==== *Instantiate and query the database* diff --git a/courses/.gitignore b/courses/.gitignore index 7795cadf..64639cde 100644 --- a/courses/.gitignore +++ b/courses/.gitignore @@ -1 +1,2 @@ **/src/slides.adoc +**/src/exercises.adoc diff --git a/courses/DS220/build.gradle b/courses/DS220/build.gradle index 5d6d3a98..964f5222 100644 --- a/courses/DS220/build.gradle +++ b/courses/DS220/build.gradle @@ -49,7 +49,6 @@ DataStax Training :split: """ vertices.each { vertex -> -// writer.println ":slide_path: ../${curriculumRootDir}/${vertex}/src/slides" writer.println ":slide_path: slides" writer.println ":image_path: ${buildDir}/images/${vertex}" writer.println "include::../${curriculumRootDir}/${vertex}/src/includes.adoc[]" @@ -60,6 +59,23 @@ DataStax Training } writer.flush() } + + + includeFile = file('src/exercises.adoc') + includeFile.withWriter { writer -> + writer.println """\ += DS210: Data Modeling +DataStax Training +:backend: html5 +""" + vertices.each { vertex -> + writer.println ":image_path: ${buildDir}/images/${vertex}" + writer.println "include::../${curriculumRootDir}/${vertex}/src/exercises.adoc[]" + } + writer.flush() + } + + } } diff --git a/gradle/plugins/curriculum.gradle b/gradle/plugins/curriculum.gradle index 415516f8..44a781af 100644 --- a/gradle/plugins/curriculum.gradle +++ b/gradle/plugins/curriculum.gradle @@ -31,6 +31,7 @@ docs { options eruby: 'erubis' attributes 'source-highlighter': 'coderay' +// attributes 'image_path': 'images' attributes idprefix: '' attributes idseparator: '-' @@ -44,8 +45,6 @@ slides { include 'slides.adoc' } - println sourceDir - backends 'deckjs' options template_dirs : [new File(templateDir, 'haml').absolutePath ] From e1d13059a36a209788e26f6ebc5e434edb548f2c Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Thu, 16 Apr 2015 08:49:46 -0600 Subject: [PATCH 10/13] Renamed notes file --- .../sensor-data/src/{just-plain-old-notes.adoc => notes.adoc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cassandra/dev/data-modeling/use-cases/sensor-data/src/{just-plain-old-notes.adoc => notes.adoc} (100%) diff --git a/cassandra/dev/data-modeling/use-cases/sensor-data/src/just-plain-old-notes.adoc b/cassandra/dev/data-modeling/use-cases/sensor-data/src/notes.adoc similarity index 100% rename from cassandra/dev/data-modeling/use-cases/sensor-data/src/just-plain-old-notes.adoc rename to cassandra/dev/data-modeling/use-cases/sensor-data/src/notes.adoc From c0684fc46537c611a7a184180cb4c9cc3749d6f0 Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Thu, 16 Apr 2015 10:54:48 -0600 Subject: [PATCH 11/13] He saw the src directory didn't exist--and then he did this! At present, course builds treat the contents of the source directory as a build output, even to the point of .gitignoring `slides.asc` and `exercises.asc` (which the course build creates). The git ignore thing might not be able to last forever, but the build should still take responsibility for creating the `src` directory if it doesn't already exist. The build was failing for users who didn't already have the dir. --- courses/DS220/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/courses/DS220/build.gradle b/courses/DS220/build.gradle index 964f5222..2311054c 100644 --- a/courses/DS220/build.gradle +++ b/courses/DS220/build.gradle @@ -35,6 +35,7 @@ def vertices = [ task buildMasterPresentation { File includeFile doLast { + file('src').mkdirs() includeFile = file('src/slides.adoc') includeFile.withWriter { writer -> writer.println """\ From f11cbb84ad5be74b53fef2da735a89663faa2d1f Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Thu, 16 Apr 2015 17:10:45 -0600 Subject: [PATCH 12/13] The .gitignore I had set up here was going to cause :rage4:. Fixed it. --- courses/.gitignore | 2 -- courses/DS220/build.gradle | 1 - courses/DS220/src/.gitignore | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 courses/.gitignore create mode 100644 courses/DS220/src/.gitignore diff --git a/courses/.gitignore b/courses/.gitignore deleted file mode 100644 index 64639cde..00000000 --- a/courses/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -**/src/slides.adoc -**/src/exercises.adoc diff --git a/courses/DS220/build.gradle b/courses/DS220/build.gradle index 2311054c..964f5222 100644 --- a/courses/DS220/build.gradle +++ b/courses/DS220/build.gradle @@ -35,7 +35,6 @@ def vertices = [ task buildMasterPresentation { File includeFile doLast { - file('src').mkdirs() includeFile = file('src/slides.adoc') includeFile.withWriter { writer -> writer.println """\ diff --git a/courses/DS220/src/.gitignore b/courses/DS220/src/.gitignore new file mode 100644 index 00000000..e3bc8a3e --- /dev/null +++ b/courses/DS220/src/.gitignore @@ -0,0 +1,3 @@ +exercises.adoc +slides.adoc + From f2b353b2ed554562b04cb6830f935c62f5a02cfd Mon Sep 17 00:00:00 2001 From: Tim Berglund Date: Fri, 17 Apr 2015 12:56:57 -0600 Subject: [PATCH 13/13] He saw that exercise-less vertices were breaking the build--and this is what he did! Now we check to see whether exercise files exist before attempting to include them. There is no requirement that a vertex have exercises, so this is a good thing. --- courses/DS220/build.gradle | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/courses/DS220/build.gradle b/courses/DS220/build.gradle index 964f5222..b7bc20bd 100644 --- a/courses/DS220/build.gradle +++ b/courses/DS220/build.gradle @@ -30,6 +30,7 @@ apply from: "${curriculumRootDir}/gradle/plugins/curriculum.gradle" def vertices = [ 'cassandra/dev/data-modeling/use-cases/sensor-data', + 'cassandra/dev/data-modeling/logical/principles' ] task buildMasterPresentation { @@ -69,8 +70,11 @@ DataStax Training :backend: html5 """ vertices.each { vertex -> - writer.println ":image_path: ${buildDir}/images/${vertex}" - writer.println "include::../${curriculumRootDir}/${vertex}/src/exercises.adoc[]" + def exercisesFile = "${curriculumRootDir}/${vertex}/src/exercises.adoc" + if(file(exercisesFile).exists()) { + writer.println ":image_path: ${buildDir}/images/${vertex}" + writer.println "include::../${exercisesFile}[]" + } } writer.flush() }