Skip to content

Commit

Permalink
feat(fixer_v2): Hole Pattern Syntax for Pattern Matching (#113)
Browse files Browse the repository at this point in the history
# Description

A hole pattern is a special syntax used in pattern matching that acts as
a placeholder or "wildcard" in a pattern. Think of it as a variable that
can match and capture different parts of code while searching through
source code.

```go
// Pattern with holes
if :[[condition]] {
    :[[body]]
}

// Can match code like:
if x > 0 {
    return true
}

// Or:
if isValid(user) {
    doSomething()
}
```

## Key Changes

1. Added support for typed hole patterns with format `:[[name:type]]`
2. Implemented quantifier support for hole patterns (`*`, `+`, `?`)
3. Enhanced lexer to properly parse and tokenize new pattern syntax
4. Added hole configuration system to manage pattern types and
quantifiers
5. Updated parser to handle the new hole types and configurations
6. Added comprehensive test coverage for new features

## New Pattern Syntax

| Syntax | Description | Example | Notes |
|--------|-------------|---------|-------|
| `:[name]` | Basic hole pattern | `:[var]` | Matches any content |
| `:[[name]]` | Long-form hole pattern | `:[[expr]]` | Same as basic,
but with double brackets |
| `:[[name:identifier]]` | Identifier-typed hole | `:[[var:identifier]]`
| Matches only valid identifiers |
| `:[[name:block]]` | Block-typed hole | `:[[body:block]]` | Matches
code blocks |
| `:[[name:whitespace]]` | Whitespace-typed hole | `:[[ws:whitespace]]`
| Matches whitespace |
| `:[[name:expression]]` | Expression-typed hole |
`:[[expr:expression]]` | Matches expressions |

### Quantifiers

| Quantifier | Description | Example |
|------------|-------------|---------|
| `*` | Zero or more | `:[[stmt:block]]*` |
| `+` | One or more | `:[[expr:expression]]+` |
| `?` | Zero or one | `:[[ws:whitespace]]?` |

## Example Usage

```go
// Before
if :[condition] { :[body] }

// After - with types and quantifiers
if :[[cond:expression]] {
    :[[stmt:block]]*
}
```

## Next Steps
Future improvements could include:

- Implementing actual pattern matching logic for each hole type
- Adding pattern validation based on types
- Enhancing error reporting for invalid patterns
- Adding more specialized hole types for specific use cases

## Related Issue

#111
  • Loading branch information
notJoon authored Jan 18, 2025
1 parent da1902e commit c4f86e3
Show file tree
Hide file tree
Showing 7 changed files with 548 additions and 90 deletions.
6 changes: 3 additions & 3 deletions fixer_v2/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func buildRegexFromAST(node parser.Node) Option[Result] {

case *parser.HoleNode:
// convert hole name to capture group name
captures[v.Name] = groupCount
captures[v.Name()] = groupCount
groupCount++
sb.WriteString(`([^{}]+?)`)

Expand Down Expand Up @@ -110,11 +110,11 @@ func rewrite(rewritePattern string, env map[string]string) string {

case *parser.HoleNode:
// replace hole name with the corresponding value in 'env'
if value, ok := env[v.Name]; ok {
if value, ok := env[v.Name()]; ok {
result.WriteString(value)
} else {
// if value is not found, keep the original hole expression
result.WriteString(fmt.Sprintf(":[%s]", v.Name))
result.WriteString(fmt.Sprintf(":[%s]", v.Name()))
}

case *parser.BlockNode:
Expand Down
222 changes: 222 additions & 0 deletions fixer_v2/query/hole.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
package query

import (
"fmt"
"strings"
)

// HoleType defines the type of hole pattern
type HoleType int

const (
HoleAny HoleType = iota // :[[name]] or :[name]
HoleIdentifier // :[[id:identifier]]
HoleBlock // :[[block:block]]
HoleWhitespace // :[[ws:whitespace]]
HoleExpression // :[[expr:expression]]
)

func (h HoleType) String() string {
switch h {
case HoleAny:
return "any"
case HoleIdentifier:
return "identifier"
case HoleBlock:
return "block"
case HoleWhitespace:
return "whitespace"
case HoleExpression:
return "expression"
default:
return "unknown"
}
}

// Quantifier defines repetition patterns
type Quantifier int

const (
QuantNone Quantifier = iota // No quantifier (exactly once)
QuantZeroOrMore // * (zero or more times)
QuantOneOrMore // + (one or more times)
QuantZeroOrOne // ? (zero or one time)
)

func (q Quantifier) String() string {
switch q {
case QuantNone:
return ""
case QuantZeroOrMore:
return "*"
case QuantOneOrMore:
return "+"
case QuantZeroOrOne:
return "?"
default:
return "unknown"
}
}

// ParseHolePattern parses a hole pattern string and returns a HoleConfig
// Format: :[[name:type]] or :[[name:type]]*
func ParseHolePattern(pattern string) (*HoleConfig, error) {
// Skip : and opening brackets
start := 1
if pattern[1] == '[' && pattern[2] == '[' {
start = 3
} else if pattern[1] == '[' {
start = 2
} else {
return nil, fmt.Errorf("invalid hole pattern: %s", pattern)
}

// Find the end of the pattern
// Find end excluding quantifier and closing brackets
end := len(pattern) - 1

// Check for quantifier
hasQuantifier := end >= 0 && (pattern[end] == '*' || pattern[end] == '+' || pattern[end] == '?')
if hasQuantifier {
end--
}

// Remove closing brackets
if end >= 1 && pattern[end-1:end+1] == "]]" {
end -= 2
} else if end >= 0 && pattern[end] == ']' {
end--
}

if end < start {
return nil, fmt.Errorf("invalid hole pattern: %s", pattern)
}

// Parse name and type
content := pattern[start : end+1]
parts := strings.Split(content, ":")
config := &HoleConfig{
Name: parts[0],
Type: HoleAny,
Quantifier: QuantNone,
}

// Parse type if specified
if len(parts) > 1 {
switch parts[1] {
case "identifier":
config.Type = HoleIdentifier
case "block":
config.Type = HoleBlock
case "whitespace":
config.Type = HoleWhitespace
case "expression":
config.Type = HoleExpression
default:
return nil, fmt.Errorf("unknown hole type: %s", parts[1])
}
}

// Set quantifier if found earlier
if hasQuantifier {
switch pattern[len(pattern)-1] {
case '*':
config.Quantifier = QuantZeroOrMore
case '+':
config.Quantifier = QuantOneOrMore
case '?':
config.Quantifier = QuantZeroOrOne
}
}

return config, nil
}

func (l *Lexer) matchHole() bool {
if l.position+1 >= len(l.input) {
return false
}
startPos := l.position

if l.input[l.position+1] == '[' {
isLongForm := (l.position+2 < len(l.input) && l.input[l.position+2] == '[')
end := l.findHoleEnd(isLongForm)
if end > 0 {
// Check for quantifier
if end < len(l.input) && isQuantifier(l.input[end]) {
end++
}

value := l.input[l.position:end]
config, err := ParseHolePattern(value)
if err != nil {
// If parsing fails, try to extract at least the name and create a basic config
basicName := extractHoleName(value)
basicConfig := HoleConfig{
Name: basicName,
Type: HoleAny,
Quantifier: QuantNone,
}
l.addTokenWithHoleConfig(TokenHole, value, startPos, basicConfig)
} else {
// Create a token with the parsed configuration
l.addTokenWithHoleConfig(TokenHole, value, startPos, *config)
}
l.position = end
return true
}
}
return false
}

func (l *Lexer) addTokenWithHoleConfig(tokenType TokenType, value string, pos int, config HoleConfig) {
l.tokens = append(l.tokens, Token{
Type: tokenType,
Value: value,
Position: pos,
HoleConfig: &config,
})
}

// isQuantifier checks if a character is a valid quantifier
func isQuantifier(c byte) bool {
return c == '*' || c == '+' || c == '?'
}

func (l *Lexer) findHoleEnd(isLongForm bool) int {
if isLongForm {
for i := l.position + 3; i < len(l.input)-1; i++ {
if l.input[i] == ']' && l.input[i+1] == ']' {
// Check if there's a quantifier after the closing brackets
if i+2 < len(l.input) && isQuantifier(l.input[i+2]) {
return i + 3
}
return i + 2
}
}
} else {
for i := l.position + 2; i < len(l.input); i++ {
if l.input[i] == ']' {
// Check if there's a quantifier after the closing bracket
if i+1 < len(l.input) && isQuantifier(l.input[i+1]) {
return i + 2
}
return i + 1
}
}
}
return -1
}

// extractHoleName extracts the hole name from a string like ":[name]" or ":[[name]]".
// For example, ":[[cond]]" -> "cond", ":[cond]" -> "cond".
// Make sure the token value is well-formed before calling this function.
func extractHoleName(tokenValue string) string {
// We expect tokenValue to start with :[ or :[[, e.g. :[[cond]]
if len(tokenValue) > 4 && tokenValue[:3] == ":[[" {
// :[[ ... ]]
return tokenValue[3 : len(tokenValue)-2]
}
// :[ ... ]
return tokenValue[2 : len(tokenValue)-1]
}
Loading

0 comments on commit c4f86e3

Please sign in to comment.