Skip to content

Commit

Permalink
Separate docs source from parsing v2 (#1387)
Browse files Browse the repository at this point in the history
This PR refactors the code used to acquire the markdown files we source
our docs from. The goal is to make it easy to test file acquisition and
parsing separately.

The new interface looks like this:

https://github.com/pulumi/pulumi-terraform-bridge/blob/856055a0cd3238780c9c35bbe605282f75cc2de7/pkg/tfgen/source.go#L30-L47

This PR is best reviewed as two separate commits:
- d463605 is a byte for byte copy of
the functionality that we want to refactor from `docs.go` to the newly
created `source.go`.
- 856055a implements and consumes the
interface.

This PR does not have *any* effect on downstream users. It is purely a
refactor for maintainability.
  • Loading branch information
iwahbe authored Sep 20, 2023
1 parent 8cda755 commit 87b8d9e
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 152 deletions.
181 changes: 35 additions & 146 deletions pkg/tfgen/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@ import (
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime/debug"
"sort"
"strings"
"sync"
"unicode"

"github.com/hashicorp/go-multierror"
Expand All @@ -38,7 +36,6 @@ import (
"golang.org/x/text/language"

"github.com/pulumi/pulumi/pkg/v3/codegen/python"
"github.com/pulumi/pulumi/sdk/v3/go/common/diag"
"github.com/pulumi/pulumi/sdk/v3/go/common/tokens"
"github.com/pulumi/pulumi/sdk/v3/go/common/util/contract"

Expand Down Expand Up @@ -136,115 +133,6 @@ const (
DataSourceDocs DocKind = "data-sources"
)

var repoPaths sync.Map

func getRepoPath(gitHost string, org string, provider string, version string) (string, error) {
moduleCoordinates := fmt.Sprintf("%s/%s/terraform-provider-%s", gitHost, org, provider)
if version != "" {
moduleCoordinates = fmt.Sprintf("%s/%s", moduleCoordinates, version)
}

if path, ok := repoPaths.Load(moduleCoordinates); ok {
return path.(string), nil
}

curWd, err := os.Getwd()
if err != nil {
return "", fmt.Errorf("error finding current working directory: %w", err)
}
if filepath.Base(curWd) != "provider" {
curWd = filepath.Join(curWd, "provider")
}

command := exec.Command("go", "mod", "download", "-json", moduleCoordinates)
command.Dir = curWd
output, err := command.CombinedOutput()
if err != nil {
msg := "error running 'go mod download -json' in %q dir for module: %w\n\nOutput: %s"
return "", fmt.Errorf(msg, curWd, err, output)
}

target := struct {
Version string
Dir string
Error string
}{}

if err := json.Unmarshal(output, &target); err != nil {
return "", fmt.Errorf("error parsing output of 'go mod download -json' for module: %w", err)
}

if target.Error != "" {
return "", fmt.Errorf("error from 'go mod download -json' for module: %s", target.Error)
}

repoPaths.Store(moduleCoordinates, target.Dir)

return target.Dir, nil
}

func getMarkdownNames(packagePrefix, rawName string, globalInfo *tfbridge.DocRuleInfo) []string {
possibleMarkdownNames := []string{
// Most frequently, docs leave off the provider prefix
withoutPackageName(packagePrefix, rawName) + ".html.markdown",
withoutPackageName(packagePrefix, rawName) + ".markdown",
withoutPackageName(packagePrefix, rawName) + ".html.md",
withoutPackageName(packagePrefix, rawName) + ".md",
// But for some providers, the prefix is included in the name of the doc file
rawName + ".html.markdown",
rawName + ".markdown",
rawName + ".html.md",
rawName + ".md",
}

if globalInfo != nil && globalInfo.AlternativeNames != nil {
// We look at user generated names before we look at default names
possibleMarkdownNames = append(globalInfo.AlternativeNames(tfbridge.DocsPathInfo{
TfToken: rawName,
}), possibleMarkdownNames...)
}

return possibleMarkdownNames
}

func getMarkdownDetails(sink diag.Sink, repoPath, org, provider string,
resourcePrefix string, kind DocKind, rawName string,
info tfbridge.ResourceOrDataSourceInfo, providerModuleVersion string, githost string,
globalInfo *tfbridge.DocRuleInfo,
) ([]byte, string, bool) {

var docinfo *tfbridge.DocInfo
if info != nil {
docinfo = info.GetDocs()
}
if docinfo != nil && len(docinfo.Markdown) != 0 {
return docinfo.Markdown, "", true
}

if repoPath == "" {
var err error
repoPath, err = getRepoPath(githost, org, provider, providerModuleVersion)
if err != nil {
msg := "Skip getMarkdownDetails(rawname=%q) because getRepoPath(%q, %q, %q, %q) failed: %v"
sink.Debugf(&diag.Diag{Message: msg}, rawName, githost, org, provider, providerModuleVersion, err)
return nil, "", false
}
}

possibleMarkdownNames := getMarkdownNames(resourcePrefix, rawName, globalInfo)

if docinfo != nil && docinfo.Source != "" {
possibleMarkdownNames = append(possibleMarkdownNames, docinfo.Source)
}

markdownBytes, markdownFileName, found := readMarkdown(repoPath, kind, possibleMarkdownNames)
if !found {
return nil, "", false
}

return markdownBytes, markdownFileName, true
}

// Create a regexp based replace rule that is bounded by non-ascii letter text.
//
// This function is not appropriate to be called in hot loops.
Expand Down Expand Up @@ -329,17 +217,34 @@ func formatEntityName(rawname string) string {

// getDocsForResource extracts documentation details for the given package from
// TF website documentation markdown content
func getDocsForResource(g *Generator, org string, provider string, resourcePrefix string, kind DocKind,
rawname string, info tfbridge.ResourceOrDataSourceInfo, providerModuleVersion string,
githost string) (entityDocs, error) {
func getDocsForResource(g *Generator, source DocsSource, kind DocKind,
rawname string, info tfbridge.ResourceOrDataSourceInfo) (entityDocs, error) {

if g.skipDocs {
return entityDocs{}, nil
}

markdownBytes, markdownFileName, found := getMarkdownDetails(g.sink, g.info.UpstreamRepoPath, org, provider,
resourcePrefix, kind, rawname, info, providerModuleVersion, githost, g.info.DocRules)
if !found {
var docInfo *tfbridge.DocInfo
if info != nil {
docInfo = info.GetDocs()
}

var docFile *DocFile
var err error
switch kind {
case ResourceDocs:
docFile, err = source.getResource(rawname, docInfo)
case DataSourceDocs:
docFile, err = source.getDatasource(rawname, docInfo)
default:
panic("unknown docs kind")
}

if err != nil {
return entityDocs{}, fmt.Errorf("get docs for token %s: %w", rawname, err)
}

if docFile == nil {
entitiesMissingDocs++
msg := fmt.Sprintf("could not find docs for %v %v. Override the Docs property in the %v mapping. See "+
"type tfbridge.DocInfo for details.", kind, formatEntityName(rawname), kind)
Expand All @@ -357,41 +262,39 @@ func getDocsForResource(g *Generator, org string, provider string, resourcePrefi
return entityDocs{}, nil
}

doc, err := parseTFMarkdown(g, info, kind, markdownBytes, markdownFileName, resourcePrefix, rawname)
markdownBytes, markdownFileName := docFile.Content, docFile.FileName

doc, err := parseTFMarkdown(g, info, kind, markdownBytes, markdownFileName, rawname)
if err != nil {
return entityDocs{}, err
}

var docinfo *tfbridge.DocInfo
if info != nil {
docinfo = info.GetDocs()
}
if docinfo != nil {
if docInfo != nil {
// Helper func for readability due to large number of params
getSourceDocs := func(sourceFrom string) (entityDocs, error) {
return getDocsForResource(g, org, provider, resourcePrefix, kind, sourceFrom, nil, providerModuleVersion, githost)
return getDocsForResource(g, source, kind, sourceFrom, nil)
}

if docinfo.IncludeAttributesFrom != "" {
sourceDocs, err := getSourceDocs(docinfo.IncludeAttributesFrom)
if docInfo.IncludeAttributesFrom != "" {
sourceDocs, err := getSourceDocs(docInfo.IncludeAttributesFrom)
if err != nil {
return doc, err
}

overlayAttributesToAttributes(sourceDocs, doc)
}

if docinfo.IncludeAttributesFromArguments != "" {
sourceDocs, err := getSourceDocs(docinfo.IncludeAttributesFromArguments)
if docInfo.IncludeAttributesFromArguments != "" {
sourceDocs, err := getSourceDocs(docInfo.IncludeAttributesFromArguments)
if err != nil {
return doc, err
}

overlayArgsToAttributes(sourceDocs, doc)
}

if docinfo.IncludeArgumentsFrom != "" {
sourceDocs, err := getSourceDocs(docinfo.IncludeArgumentsFrom)
if docInfo.IncludeArgumentsFrom != "" {
sourceDocs, err := getSourceDocs(docInfo.IncludeArgumentsFrom)
if err != nil {
return doc, err
}
Expand Down Expand Up @@ -456,20 +359,6 @@ func getDocsPath(repo string, kind DocKind) string {
return filepath.Join(repo, "docs", kindString)
}

// readMarkdown searches all possible locations for the markdown content
func readMarkdown(repo string, kind DocKind, possibleLocations []string) ([]byte, string, bool) {
locationPrefix := getDocsPath(repo, kind)

for _, name := range possibleLocations {
location := filepath.Join(locationPrefix, name)
markdownBytes, err := os.ReadFile(location)
if err == nil {
return markdownBytes, name, true
}
}
return nil, "", false
}

//nolint:lll
var (
// For example:
Expand Down Expand Up @@ -513,7 +402,7 @@ func splitGroupLines(s, sep string) [][]string {
// parseTFMarkdown takes a TF website markdown doc and extracts a structured representation for use in
// generating doc comments
func parseTFMarkdown(g *Generator, info tfbridge.ResourceOrDataSourceInfo, kind DocKind,
markdown []byte, markdownFileName, resourcePrefix, rawname string) (entityDocs, error) {
markdown []byte, markdownFileName, rawname string) (entityDocs, error) {

p := &tfMarkdownParser{
sink: g,
Expand Down
10 changes: 4 additions & 6 deletions pkg/tfgen/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -1205,9 +1205,8 @@ func (g *Generator) gatherResource(rawname string,
// Collect documentation information
var entityDocs entityDocs
if !isProvider {
pd, err := getDocsForResource(g, g.info.GetGitHubOrg(), g.info.Name,
g.info.GetResourcePrefix(), ResourceDocs, rawname, info, g.info.GetProviderModuleVersion(),
g.info.GetGitHubHost())
source := NewGitRepoDocsSource(g)
pd, err := getDocsForResource(g, source, ResourceDocs, rawname, info)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1399,9 +1398,8 @@ func (g *Generator) gatherDataSource(rawname string,
g.renamesBuilder.registerDataSource(dataSourcePath)

// Collect documentation information for this data source.
entityDocs, err := getDocsForResource(g, g.info.GetGitHubOrg(), g.info.Name,
g.info.GetResourcePrefix(), DataSourceDocs, rawname, info, g.info.GetProviderModuleVersion(),
g.info.GetGitHubHost())
source := NewGitRepoDocsSource(g)
entityDocs, err := getDocsForResource(g, source, DataSourceDocs, rawname, info)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 87b8d9e

Please sign in to comment.