Skip to content

Commit

Permalink
Create Yaml files from Google Sheet
Browse files Browse the repository at this point in the history
  • Loading branch information
GromNaN committed Oct 4, 2023
1 parent 60908f0 commit 6fe9809
Show file tree
Hide file tree
Showing 4 changed files with 361 additions and 0 deletions.
2 changes: 2 additions & 0 deletions generator/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
"mongodb/mongodb": "@dev",
"nette/php-generator": "^4",
"symfony/console": "^6.3",
"symfony/css-selector": "^6.3",
"symfony/dom-crawler": "^6.3",
"symfony/yaml": "^6.3"
},
"license": "Apache-2.0",
Expand Down
122 changes: 122 additions & 0 deletions generator/config/schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/Operator",
"definitions": {
"Operator": {
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"$comment": "The name of the operator. Must start with a $",
"type": "string",
"pattern": "^\\$[a-z][a-zA-Z]+$"
},
"category": {
"$comment": "The category as defined by MongoDB's documentation.",
"type": "array",
"items": {
"type": "string"
}
},
"link": {
"$comment": "The link to the operator's documentation on MongoDB's website.",
"type": "string",
"format": "uri",
"qt-uri-protocols": [
"https"
]
},
"returnType": {
"type": "array",
"items": {
"type": "string"
}
},
"encode": {
"$comment": "Specifies how operator parameters are encoded.",
"$comment": "array: parameters are encoded as an array of values in the order they are defined by the spec",
"$comment": "object: parameters are encoded as an object with keys matching the parameter names",
"$comment": "single: get the single parameter value",
"type": "string",
"enum": [
"array",
"object",
"single"
]
},
"description": {
"$comment": "The description of the argument from MongoDB's documentation.",
"type": "string"
},
"parameters": {
"$comment": "An optional list of parameters for the operator.",
"type": "array",
"items": {
"$ref": "#/definitions/Parameter"
}
}
},
"required": [
"category",
"description",
"encode",
"link",
"name",
"parameters",
"returnType"
],
"title": "Operator"
},
"Parameter": {
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"type": "string",
"pattern": "^[a-z][a-zA-Z0-9]+$"
},
"returnType": {
"type": "array",
"items": {
"type": "string"
}
},
"description": {
"$comment": "The description of the argument from MongoDB's documentation.",
"type": "string"
},
"optional": {
"$comment": "Whether the parameter is optional or not.",
"type": "boolean"
},
"valueMin": {
"$comment": "The minimum value for a numeric parameter.",
"type": "number"
},
"valueMax": {
"$comment": "The minimum value for a numeric parameter.",
"type": "number"
},
"variadic": {
"$comment": "Whether the parameter is variadic or not.",
"type": "string",
"enum": [
"list",
"map"
]
},
"variadicMin": {
"$comment": "The minimum number of arguments for a variadic parameter.",
"type": "integer",
"minimum": 0
}
},
"required": [
"description",
"name",
"type"
],
"title": "Parameter"
}
}
}
2 changes: 2 additions & 0 deletions generator/generate
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
<?php declare(strict_types=1);

use MongoDB\CodeGenerator\Command\GenerateCommand;
use MongoDB\CodeGenerator\Command\ScrapeCommand;
use Symfony\Component\Console\Application;

if (!file_exists(__DIR__ . '/../vendor/autoload.php')) {
Expand All @@ -13,5 +14,6 @@ require __DIR__ . '/vendor/autoload.php';

$application = new Application();
$application->add(new GenerateCommand(__DIR__ . '/../', __DIR__ . '/config'));
$application->add(new ScrapeCommand(__DIR__ . '/config'));
$application->setDefaultCommand('generate');
$application->run();
235 changes: 235 additions & 0 deletions generator/src/Command/ScrapeCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
<?php
declare(strict_types=1);

namespace MongoDB\CodeGenerator\Command;

use InvalidArgumentException;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Yaml\Yaml;

use function array_column;
use function array_combine;
use function array_key_exists;
use function array_shift;
use function assert;
use function explode;
use function file_exists;
use function file_get_contents;
use function file_put_contents;
use function htmlspecialchars_decode;
use function ksort;
use function mkdir;
use function sort;
use function str_contains;
use function str_ends_with;
use function str_replace;
use function strip_tags;
use function trim;
use function var_export;

use const PHP_EOL;

final class ScrapeCommand extends Command
{
private Crawler $crawler;

/** @var array<string, string> $tabs Associative array of names to table ids */
private array $tabs;

public function __construct(
private string $configDir,
) {
parent::__construct();
}

public function configure(): void
{
$this->setName('scrape');
}

public function execute(InputInterface $input, OutputInterface $output): int
{
$index = file_get_contents('https://docs.google.com/spreadsheets/d/e/2PACX-1vROpGTJGXAKf2SVuSZaw16NwMVtzMVGH9b-YiMtddgZRZOjOO6jK2YLbTUZ0N_qe74nxGY9hYhUe-l2/pubhtml');
$this->crawler = new Crawler($index);
$this->extractTabs();

$docs = $this->getTableData($this->crawler, 'aggregation pipeline operators');
foreach ($docs as $doc) {
$this->writeYamlFile('aggregation-operators', $this->formatSpec($doc));
}

$docs = $this->getTableData($this->crawler, 'query operators');
foreach ($docs as $doc) {
$this->writeYamlFile('query-operators', $this->formatSpec($doc));
}

$docs = $this->getTableData($this->crawler, 'aggregation pipeline stages');
foreach ($docs as $doc) {
$this->writeYamlFile('aggregation-stages', $this->formatSpec($doc));
}

return Command::SUCCESS;
}

private function extractTabs(): void
{
// Extract tab names and ids
$tabs = $this->crawler->filter('#sheet-menu > li')->each(fn (Crawler $li) => [
'name' => $li->text(),
'id' => str_replace('sheet-button-', '', $li->attr('id')),
]);

$this->tabs = array_combine(array_column($tabs, 'name'), array_column($tabs, 'id'));
}

private function getTableData(Crawler $crawler, string $tabName): array
{
$id = $this->tabs[$tabName] ?? throw new InvalidArgumentException('Invalid tab name: ' . $tabName);

$table = $crawler->filter('#' . $id . ' table > tbody');

// Load the table into a 2D array
$rows = [];
$table->filter('tr')->each(function (Crawler $row, $rowIndex) use (&$rows): void {
$cellIndex = 0;

$row->filter('td')->each(function (Crawler $cell) use (&$rows, &$rowIndex, &$cellIndex): bool {
// Skip freezebar cells
if (str_contains($cell->attr('class') ?? '', 'freezebar-cell')) {
return true;
}

$rowspan = $cell->attr('rowspan') ?: 1;

// Advance to the next available cell
while (array_key_exists($rowIndex, $rows) && array_key_exists($cellIndex, $rows[$rowIndex])) {
$cellIndex++;
}

$value = $cell->html();
$value = str_replace(['<br>', '<br />', '<br/>'], PHP_EOL, $value);
$value = strip_tags($value);
$value = htmlspecialchars_decode($value);
$value = trim($value);

// Fill the next cells with null for colspan and rowspan
for ($rowIndexLoop = $rowIndex; $rowIndexLoop < $rowIndex + $rowspan; $rowIndexLoop++) {
if ($rowIndexLoop === $rowIndex) {
$rows[$rowIndexLoop][$cellIndex] = $value;
} else {
$rows[$rowIndexLoop][$cellIndex] = null;
}
}

return false;
});

if (isset($rows[$rowIndex])) {
ksort($rows[$rowIndex]);
}
});

// Extract headers from first row
$headers = array_shift($rows);

// Map header to field names + aggregate args
$docs = [];
$docId = 0;
$argId = 0;
foreach ($rows as $row) {
// Create a new document for each row that starts with a non-empty cell
if ($row[0] && $docs !== []) {
$docId++;
$argId = 0;
}

foreach ($row as $index => $cell) {
if (str_contains($headers[$index], 'Arg')) {
$docs[$docId]['Args'][$argId][str_replace('Arg', '', $headers[$index])] = $cell;
} elseif (null !== $cell) {
$docs[$docId][$headers[$index]] = $cell;
}
}

$argId++;
}

return $docs;
}

/**
* @param array{

Check failure on line 164 in generator/src/Command/ScrapeCommand.php

View workflow job for this annotation

GitHub Actions / phpcs

Missing parameter name
* Name: string,
* Category: string,
* Description: string,
* Link: string,
* ReturnType: string,
* Encode: string,
* Args: array{ Name: string, Type: string, Options: string, Description: string }
* } $doc
*/
private function formatSpec(array $doc): array
{
foreach (['Name', 'Category', 'Description', 'Link', 'ReturnType', 'Encode', 'Args'] as $key) {
assert(isset($doc[$key]), 'Missing ' . $key . ' for ' . var_export($doc, true));
}

$spec = [];
$spec['name'] = $doc['Name'];
$spec['category'] = explode(PHP_EOL, $doc['Category']);
sort($spec['category']);
$spec['link'] = $doc['Link'];
$spec['returnType'] = explode(PHP_EOL, $doc['ReturnType']);
$spec['encode'] = $doc['Encode'];

if ($doc['Description']) {
$spec['description'] = $doc['Description'] . PHP_EOL;
}

foreach ($doc['Args'] as $arg) {
foreach (['Name', 'Type', 'Options', 'Description'] as $key) {
assert(isset($arg[$key]), 'Missing Arg' . $key . ' for ' . var_export($doc, true));
}

$parameter = [];
$parameter['name'] = $arg['Name'];
$parameter['type'] = explode(PHP_EOL, $doc['ReturnType']);
if (str_contains($arg['Options'], 'Optional')) {
$parameter['optional'] = true;
}

if ($arg['Description']) {
$parameter['description'] = $arg['Description'] . PHP_EOL;
}

$spec['parameters'][] = $parameter;
}

return $spec;
}

private function writeYamlFile(string $dirname, array $data): void
{
$yaml = Yaml::dump($data, 3, 4, Yaml::DUMP_MULTI_LINE_LITERAL_BLOCK);
$dirname = $this->configDir . '/' . $dirname;
if (! file_exists($dirname)) {
mkdir($dirname, 0755);
}

$name = str_replace('$', '', $data['name']) ?: 'positional';
$filename = $dirname . '/' . $name . '.yaml';

// Add a schema reference to the top of the file
$schema = '# $schema: ../schema.json' . PHP_EOL;

// Add a trailing newline if one is not present
if (! str_ends_with($yaml, PHP_EOL)) {
$yaml .= PHP_EOL;
}

file_put_contents($filename, $schema . $yaml);
}
}

0 comments on commit 6fe9809

Please sign in to comment.