diff --git a/.github/workflows/lib/yaml_to_json.py b/.github/workflows/lib/yaml_to_json.py new file mode 100644 index 00000000..873db833 --- /dev/null +++ b/.github/workflows/lib/yaml_to_json.py @@ -0,0 +1,9 @@ +import sys +import yaml +import json + +with open(sys.argv[1], 'r') as input: + yml = yaml.safe_load(input) + +with open(sys.argv[2], 'w') as output: + json.dump(yml, output) \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..7582d7f4 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,24 @@ +name: release + +on: + push: + branches: + - master + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Convert yaml to json + run: python .github/workflows/lib/yaml_to_json.py resources/referers.yml referers.json + - name: Get datetime + id: datetime + run: echo "datetime=$(date "+%Y-%m-%d-%H%M%S")" >> $GITHUB_OUTPUT + - name: Tag + run: git tag ${{ steps.datetime.outputs.datetime }} && git push --tags + - name: Create GitHub release and attach referers.json + uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ steps.datetime.outputs.datetime }} + files: ./referers.json \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..58b70d6f --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,13 @@ +name: test + +on: pull_request + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Convert yaml to json + run: python .github/workflows/lib/yaml_to_json.py resources/referers.yml referers.json + - name: Check + run: cat referers.json \ No newline at end of file diff --git a/.gitignore b/.gitignore deleted file mode 100644 index ea8093a7..00000000 --- a/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -/vendor/ -/composer.lock -php/.idea - -# Ruby -ruby/.idea/.name -ruby/.idea/.rakeTasks -ruby/.idea/encodings.xml -ruby/.idea/misc.xml -ruby/.idea/modules.xml -ruby/.idea/ruby.iml -ruby/.idea/scopes/scope_settings.xml -ruby/.idea/vcs.xml -ruby/.idea/workspace.xml - -# Vagrant -.vagrant diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 72c8412d..00000000 --- a/.travis.yml +++ /dev/null @@ -1,3 +0,0 @@ -language: ruby -script: - - ruby -e "require 'yaml'; YAML.load_file('resources/referers.yml')" diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 4cacd6a7..00000000 --- a/CHANGELOG +++ /dev/null @@ -1,17 +0,0 @@ -Release June 2017 (2017-06-12) ------------------------------- -Documentation: correct authors in README (#148) -Documentation: add new section about hosted database (#150) -Database: add preliminary list of paid sources -Database: add some ANZ's largest ISPs' web mail domains and search engine domains (Dodo, Telstra, Optus, 2degrees, iiNet, Virgin, Vodafone etc) -Database: add various other search engines (SosoDesk - unrelated to Soso) -Database: add Ukrainian search providers ukr.net and i.ua -Database: add belarusian search provider tut.by -Database: add kununu -Database: add outlook.live.com to list of Outlook.com email referrers -Database: add t.umblr domain to Tumblr -Database: merge Chinese Soso search engine under Sogou -Database: fix for ITU as it was incorrectly indented -Database: remove duplicated domains e.g. Comcast -Database: move Taboola and Outbrain from "unknown" section to paid sources -Database: move some domains into alphabetical order diff --git a/README.md b/README.md index c0879e3f..ee7df03a 100644 --- a/README.md +++ b/README.md @@ -17,17 +17,16 @@ The latest database is always available on this URL: https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-latest.yaml https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-latest.json -The database is updated at most once a month. Each new version of the database is also uploaded with a timestamp: +The database is updated every day. Each new version of the database is also uploaded with a timestamp: https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-YYYYMMDD.yaml https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-YYYYMMDD.json Example: + https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-20200331.yaml https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-20200331.json -If there is an issue with the database necessitating a re-release within the month, the corresponding files will be overwritten. - ## Language-specific repositories - Scala: https://github.com/snowplow-referer-parser/scala-referer-parser @@ -78,9 +77,7 @@ We welcome contributions to referer-parser: ## Support -General support for referer-parser is handled by the team at Snowplow Analytics Ltd. - -You can contact the Snowplow Analytics team through any of the [channels listed on their wiki][talk-to-us]. +General support for `referer-parser` is handled by Snowplow Analytics team on [discourse][discourse]. ## Copyright and license @@ -90,6 +87,8 @@ You can contact the Snowplow Analytics team through any of the [channels listed [snowplow]: https://github.com/snowplow/snowplow +[discourse]: https://discourse.snowplow.io + [piwik]: http://piwik.org [piwik-search-engines]: https://github.com/piwik/piwik/blob/master/core/DataFiles/SearchEngines.php [piwik-socials]: https://github.com/piwik/piwik/blob/master/core/DataFiles/Socials.php diff --git a/Vagrantfile b/Vagrantfile deleted file mode 100644 index 1bfb812b..00000000 --- a/Vagrantfile +++ /dev/null @@ -1,19 +0,0 @@ -Vagrant.configure("2") do |config| - - config.vm.box = "ubuntu/trusty64" - config.vm.hostname = "referer-parser" - config.ssh.forward_agent = true - - config.vm.provider :virtualbox do |vb| - vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s - vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] - vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] - # Scala is our most memory-hungry library - vb.memory = 5120 - end - - config.vm.provision :shell do |sh| - sh.path = "vagrant/up.bash" - end - -end diff --git a/composer.json b/composer.json deleted file mode 100644 index 0c467ca4..00000000 --- a/composer.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "name": "snowplow/referer-parser", - "description": "Snowplow Refer(r)er parser for PHP", - "require-dev": { - "phpunit/phpunit": "3.*", - "symfony/yaml": "*" - }, - "suggest": { - "symfony/yaml": "Support for YAML configuration file" - }, - "license": "MIT", - "authors": [ - { - "name": "Lars Strojny", - "email": "lars@strojny.net" - } - ], - "autoload": { - "psr-0": { - "Snowplow\\RefererParser": "php/src/" - } - } -} diff --git a/sync_data.py b/sync_data.py deleted file mode 100755 index c9a2aaa4..00000000 --- a/sync_data.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2013 Martin Katrenik, Snowplow Analytics Ltd. All rights reserved. -# -# This program is licensed to you under the Apache License Version 2.0, -# and you may not use this file except in compliance with the Apache -# License Version 2.0. -# You may obtain a copy of the Apache License Version 2.0 at -# http://www.apache.org/licenses/LICENSE-2.0. -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the Apache License Version 2.0 is -# distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. -# See the Apache License Version 2.0 for the specific language -# governing permissions and limitations there under. - -# Authors:: Martin Katrenik, Alex Dean (mailto:support@snowplowanalytics.com) -# Copyright:: Copyright (c) 2013 Martin Katrenik, Snowplow Analytics Ltd -# License:: Apache License Version 2.0 - -# Syncs common referer-parser resources to the -# language-specific sub projects. -# -# Syncs: -# 1. The referers.yml, plus a generated JSON equivalent -# 2. The referer-tests.json -# -# Finishes by committing the synchronized resources. - -import os -import shutil -import json -import yaml -import subprocess - - -root_path = os.path.dirname(__file__) - -# Source paths -REFERER_SOURCE = os.path.join(root_path, 'resources', 'referers.yml') -REFERER_JSON_OUT = 'referers.json' -TEST_SOURCE = os.path.join(root_path, 'resources', 'referer-tests.json') - -# Target paths -REFERER_TARGETS = [ - os.path.join(root_path, "ruby","data"), - os.path.join(root_path, "java-scala","src","main","resources"), - os.path.join(root_path, "python","referer_parser","data"), - os.path.join(root_path, "nodejs","data"), - os.path.join(root_path, "dotnet","RefererParser","Resources"), - os.path.join(root_path, "php","data"), - os.path.join(root_path, "go", "data") -] -TEST_TARGETS = [ - os.path.join(root_path, "java-scala","src","test","resources"), - # Add remainder as paths determined etc -] - -# JSON builder -def build_json(): - searches = yaml.load(open(REFERER_SOURCE)) - return json.dumps(searches, sort_keys = False, indent = 4) - -JSON = build_json() - - -# File ops -def copy_file(src, dest): - try: - print "copying {0} to {1} ".format(src, dest) - shutil.copy(src, dest) - except shutil.Error as e: - print('Error: %s' % e) - except IOError as e: - print('IOError: %s' % e.strerror) - -def write_file(content, dest): - print "writing to {0} ".format(dest) - with open(dest, 'w') as f: - f.write(content) - - -# Sync process -def sync_referers_to(dest): - copy_file(REFERER_SOURCE, dest) - write_file(JSON, os.path.join(dest, REFERER_JSON_OUT)) - -def sync_tests_to(dest): - copy_file(TEST_SOURCE, dest) - -for dest in REFERER_TARGETS: - sync_referers_to(dest) - -for dest in TEST_TARGETS: - sync_tests_to(dest) - - -# Commit on current branch -commit = "git commit {0}".format(" ".join(REFERER_TARGETS + TEST_TARGETS)) -msg = "\"Updated {0}, {1} and {2} in sub-folder following update(s) to master copy\"".format(REFERER_SOURCE, REFERER_JSON_OUT, TEST_SOURCE) -subprocess.call(commit + ' -m' + msg, shell=True) diff --git a/vagrant/.gitignore b/vagrant/.gitignore deleted file mode 100644 index 1b4b29ff..00000000 --- a/vagrant/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.peru -oss-playbooks -ansible diff --git a/vagrant/ansible.hosts b/vagrant/ansible.hosts deleted file mode 100644 index 588fa08c..00000000 --- a/vagrant/ansible.hosts +++ /dev/null @@ -1,2 +0,0 @@ -[vagrant] -127.0.0.1:2222 diff --git a/vagrant/peru.yaml b/vagrant/peru.yaml deleted file mode 100644 index e7fdf41c..00000000 --- a/vagrant/peru.yaml +++ /dev/null @@ -1,14 +0,0 @@ -imports: - ansible: ansible - ansible_playbooks: oss-playbooks - -curl module ansible: - # Equivalent of git cloning tags/v1.6.6 but much, much faster - url: https://codeload.github.com/ansible/ansible/zip/69d85c22c7475ccf8169b6ec9dee3ee28c92a314 - unpack: zip - export: ansible-69d85c22c7475ccf8169b6ec9dee3ee28c92a314 - -git module ansible_playbooks: - url: https://github.com/snowplow/ansible-playbooks.git - # Comment out to fetch a specific rev instead of master: - # rev: xxx diff --git a/vagrant/up.bash b/vagrant/up.bash deleted file mode 100755 index 7450ae89..00000000 --- a/vagrant/up.bash +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -set -e - -vagrant_dir=/vagrant/vagrant -bashrc=/home/vagrant/.bashrc - -echo "========================================" -echo "INSTALLING PERU AND ANSIBLE DEPENDENCIES" -echo "----------------------------------------" -apt-get update -apt-get install -y language-pack-en git unzip libyaml-dev python3-pip python-yaml python-paramiko python-jinja2 - -echo "===============" -echo "INSTALLING PERU" -echo "---------------" -sudo pip3 install peru - -echo "=======================================" -echo "CLONING ANSIBLE AND PLAYBOOKS WITH PERU" -echo "---------------------------------------" -cd ${vagrant_dir} && peru sync -v -echo "... done" - -env_setup=${vagrant_dir}/ansible/hacking/env-setup -hosts=${vagrant_dir}/ansible.hosts - -echo "===================" -echo "CONFIGURING ANSIBLE" -echo "-------------------" -touch ${bashrc} -echo "source ${env_setup}" >> ${bashrc} -echo "export ANSIBLE_HOSTS=${hosts}" >> ${bashrc} -echo "... done" - -echo "==========================================" -echo "RUNNING PLAYBOOKS WITH ANSIBLE*" -echo "* no output while each playbook is running" -echo "------------------------------------------" -while read pb; do - su - -c "source ${env_setup} && ${vagrant_dir}/ansible/bin/ansible-playbook ${vagrant_dir}/${pb} --connection=local --inventory-file=${hosts}" vagrant -done <${vagrant_dir}/up.playbooks - -guidance=${vagrant_dir}/up.guidance - -if [ -f ${guidance} ]; then - echo "===========" - echo "PLEASE READ" - echo "-----------" - cat $guidance -fi diff --git a/vagrant/up.guidance b/vagrant/up.guidance deleted file mode 100644 index 0575dbc6..00000000 --- a/vagrant/up.guidance +++ /dev/null @@ -1,3 +0,0 @@ -To get started: -vagrant ssh -cd /vagrant diff --git a/vagrant/up.playbooks b/vagrant/up.playbooks deleted file mode 100644 index d8a0fc1f..00000000 --- a/vagrant/up.playbooks +++ /dev/null @@ -1,3 +0,0 @@ -oss-playbooks/java6.yml -oss-playbooks/scala.yml -oss-playbooks/sbt.yml