Skip to content

Commit

Permalink
Merge pull request #26 from pestanko/chore/ISS-25-sagram
Browse files Browse the repository at this point in the history
Chore/iss 25 sagram
  • Loading branch information
pestanko authored Mar 18, 2024
2 parents 54d2e03 + 5645242 commit c97cc78
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-and-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
steps:
-
name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
-
name: Set up QEMU
uses: docker/setup-qemu-action@v2
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/golangci-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.19
- uses: actions/checkout@v4

- uses: actions/checkout@v3
- uses: actions/setup-go@v4
with:
go-version-file: go.mod

- name: golangci-lint
uses: golangci/golangci-lint-action@v3
uses: golangci/golangci-lint-action@v4
with:
args: --timeout 3m0s
args: --timeout 3m0s
6 changes: 3 additions & 3 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
name: Checkout code

- name: setup golang v1.x
uses: actions/setup-go@v3
uses: actions/setup-go@v4
with:
go-version: ^1.19
go-version-file: go.mod

- name: Install Task
uses: arduino/setup-task@v1
Expand Down
5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,3 @@ repos:
- id: check-yaml
args: ['--unsafe']
- id: check-added-large-files

- repo: https://github.com/golangci/golangci-lint
rev: v1.50.1
hooks:
- id: golangci-lint
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.20-alpine AS build
FROM golang:1.22-alpine AS build

WORKDIR /app

Expand Down
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Miniscraper
# MiniScrape

Simple webpages scrapper written in GO

Expand All @@ -23,6 +23,7 @@ go get .
```

## Build the scraper

```shell
make build
```
Expand All @@ -33,13 +34,23 @@ make build
go run main.go scrape
```

Scrape the single webpage:

```shell
# For food category
go run main.go scrape -C food -N ubaumanu
```

### Run the server

```shell
make run-serve
```

## Add/Edit available webpages

The webpages list is located in ``./config/default.yml``.

## License
Miniscrape is released under the Apache 2.0 license. See LICENSE

Miniscrape is released under the Apache 2.0 license. See LICENSE
2 changes: 1 addition & 1 deletion Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ version: '3'
vars:
APPNAME: 'miniscrape'
GO_VERSION:
sh: go version | awk '{print $$3}'
sh: go version | awk '{print $3}'
GO_PATH:
sh: go env GOPATH
NC: '\033[0m'
Expand Down
12 changes: 12 additions & 0 deletions config/categories/food.yml
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,15 @@ pages:
enabled: true
html:
tables: custom

- codename: sagram
name: sagram
homepage: https://sargamrestaurace.cz/
tags: ["ns", "india", "asia"]
url: https://sargamrestaurace.cz/DMenuItems
query: "main div.row:nth-child(n+5)"
filters:
day:
enabled: true
cut:
after: "#Sunday"
7 changes: 4 additions & 3 deletions internal/scraper/filters/new_line.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package filters

import (
"github.com/pestanko/miniscrape/internal/models"
"regexp"

"github.com/pestanko/miniscrape/internal/models"
)

var normPattern = regexp.MustCompile("\n\n")
var normPattern = regexp.MustCompile("[\n]+")

// NewNewLineTrimConverter a new instance of the filter that
// cuts the line of the content
func NewNewLineTrimConverter(page *models.Page) PageFilter {
func NewNewLineTrimConverter(_ *models.Page) PageFilter {
return &newLineTrimConverter{}
}

Expand Down

0 comments on commit c97cc78

Please sign in to comment.