Skip to content

Commit

Permalink
v1.1: Added logs settings; Skip IP blocks, if undefined all geoname_i…
Browse files Browse the repository at this point in the history
…d from "geoname_id", "registered_country_geoname_id", "represented_country_geoname_id"
  • Loading branch information
mbto committed Jun 18, 2023
1 parent a9b836e commit a222efc
Show file tree
Hide file tree
Showing 23 changed files with 329 additions and 208 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#### Features:
* Downloads latest archive `GeoLite2-(Country|City)-CSV_%Date%.zip` using MaxMind API https://www.maxmind.com/ by [free license key](https://support.maxmind.com/account-faq/license-keys/how-do-i-generate-a-license-key/)
* Downloads latest archive `GeoLite2-(Country|City)-CSV_%Date%.zip` using MaxMind API https://www.maxmind.com/ by [free license key](https://support.maxmind.com/hc/en-us/articles/4407111582235-Generate-a-License-Key)
* Converts files `GeoLite2-(Country|City)-Locations-%LocaleCode%.csv, GeoLite2-(Country|City)-Blocks-(IPv4|IPv6).csv` to SQL DDL/DML
* Builds archive `maxmind-geoip2-csv2sql.zip` with scripts splitted per N megabytes, using template engine
(Example: If MySQL server `max_allowed_packet` parameter is too small - [64 MB by default](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_max_allowed_packet))
Expand Down
10 changes: 5 additions & 5 deletions build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import org.gradle.api.JavaVersion.VERSION_11

group = "com.github.mbto.maxmind-geoip2-csv2sql-converter"
version = "1.0"
version = "1.1"

plugins {
java
Expand All @@ -19,9 +19,6 @@ tasks {
exclude("*.ini", "emoji.txt")
}
test {
if(!project.hasProperty("ManualTestEnabled")) {
exclude("**/ManualTest.class")
}
maxParallelForks = Runtime.getRuntime().availableProcessors()
}
}
Expand All @@ -37,7 +34,10 @@ dependencies {
testCompile("org.projectlombok:lombok:$lombokVer")
testAnnotationProcessor("org.projectlombok:lombok:$lombokVer")

testCompile("junit:junit:4.13.2")
val jUnitVer = "5.9.3"
testCompile("org.junit.jupiter:junit-jupiter-engine:$jUnitVer")
testCompile("org.junit.jupiter:junit-jupiter-params:$jUnitVer")
testCompile("org.junit.vintage:junit-vintage-engine:$jUnitVer")
}

application {
Expand Down
20 changes: 14 additions & 6 deletions src/main/java/com/github/mbto/maxmind/geoip2/csv2sql/Args.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import com.beust.jcommander.DynamicParameter;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParametersDelegate;
import com.beust.jcommander.SubParameter;
import com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils;
import com.github.mbto.maxmind.geoip2.csv2sql.utils.jcommander.IPVersionValidator;
import com.github.mbto.maxmind.geoip2.csv2sql.utils.jcommander.LocaleConverter;
Expand All @@ -13,7 +11,10 @@
import lombok.ToString;

import java.nio.file.Path;
import java.util.*;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static com.github.mbto.maxmind.geoip2.csv2sql.utils.Constants.CONFIG_NAME_FORMAT;
Expand All @@ -38,7 +39,7 @@ public class Args {
private String outputArchiveName = DEFAULT_ARCHIVE_NAME;

@Parameter(names = "-k", order = 4,
description = "License key for MaxMind API. Free at https://support.maxmind.com/account-faq/license-keys/how-do-i-generate-a-license-key/")
description = "License key for MaxMind API. Free at https://support.maxmind.com/hc/en-us/articles/4407111582235-Generate-a-License-Key")
private String licenseKey;

@Parameter(names = "-c", order = 5, required = true,
Expand All @@ -63,10 +64,17 @@ public class Args {

@DynamicParameter(names = "-LV", order = 8, description = "Filter values from location files by group name with regex:\n" +
" Example - for both GeoLite2-Country-CSV and GeoLite2-City-CSV editions:\n" +
" -LVcontinent_code=EU,NA,OC -LVcountry_iso_code=AU,NZ,GB,IE,US,CA,CY\n" +
" -LVgeoname_id=.*777.* -LVlocale_code=en,ru,de,es,fr\n" +
" -LVcontinent_code=EU,NA,OC -LVcontinent_name=Europe|Africa,Asia\n" +
" -LVcountry_iso_code=AU,NZ,GB,IE,US,CA,CY\n" +
" -LVcountry_name=Austr.*,Zealand$,^United,Ireland,Canada|Cyprus\n" +
" -LVis_in_european_union=0|1\n" +
" At GeoLite2-City-CSV edition available filter by city_name and other group names:\n" +
" -LVcity_name=Newport,^Clinton$|^Richmond$,\"Mandria, Paphos\",^Salem")
" -LVsubdivision_1_iso_code=WO|JD|NU|GE|A.* -LVsubdivision_1_name=.*O.*\n" +
" -LVsubdivision_2_iso_code=.* -LVsubdivision_2_name=.*A.*\n" +
" -LVcity_name=Newport,^Clinton$|^Richmond$,\"Mandria, Paphos\",^Salem\n" +
" -LVmetro_code=.* -LVtime_zone=.*/.*E.*"
)
private Map<String, String> allowedLocationValuesRawByGroupName = new LinkedHashMap<>();

@Parameter(names = "-mm", order = 9,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,48 +5,54 @@

import java.util.concurrent.Callable;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

import static com.github.mbto.maxmind.geoip2.csv2sql.streaming.Event.TERMINATE;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.*;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.calcHumanDiff;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.threadPrintln;

public abstract class AbstractConverter implements Callable<Void> {
protected final Registry registry;
protected final String dataType;
protected final LinkedBlockingQueue<Message<?>> messageQueue;
protected final boolean logIgnored;

protected Thread writerT;

public AbstractConverter(Registry registry, String dataType, int queueCapacity) {
public AbstractConverter(Registry registry, String dataType, int queueCapacity, boolean logIgnored) {
this.registry = registry;
this.dataType = dataType;
this.messageQueue = new LinkedBlockingQueue<>(queueCapacity);
this.logIgnored = logIgnored;
}

@Override
public Void call() throws Exception {
long startEpoch = System.currentTimeMillis();
threadPrintln(System.out, "Started '" + dataType + " converter'");
threadPrintln(System.out, "Started " + getConverterName());
try {
return work();
} catch (Throwable e) {
throw new Exception("Exception in '" + dataType + " converter'", e);
throw new Exception("Exception in " + getConverterName(), e);
} finally {
terminateWriter();
threadPrintln(System.out, "Finished '" + dataType + " converter' in " + calcHumanDiff(startEpoch));
threadPrintln(System.out, "Finished " + getConverterName() + " in " + calcHumanDiff(startEpoch));
}
}

protected abstract Void work() throws Throwable;

protected void terminateWriter() {
if (writerT != null && writerT.isAlive()) {
threadPrintln(System.out, "Waiting for terminate writer from '" + dataType + " converter'");
threadPrintln(System.out, "Waiting for terminate writer from " + getConverterName());
try {
messageQueue.put(new Message<>(null, TERMINATE));
writerT.join(/*TimeUnit.SECONDS.toMillis(3)*/);
writerT.join(/*java.util.concurrent.TimeUnit.SECONDS.toMillis(3)*/);
} catch (InterruptedException ignored) {}
threadPrintln(System.out, "Terminated writer from '" + dataType + " converter'");
threadPrintln(System.out, "Terminated writer from " + getConverterName());
}
}

protected String getConverterName() {
return "'" + dataType + " converter'";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import com.github.jgonian.ipmath.AbstractIpRange;
import com.github.jgonian.ipmath.Ipv4Range;
import com.github.jgonian.ipmath.Ipv6Range;
import com.github.jgonian.ipmath.Range;
import com.github.mbto.maxmind.geoip2.csv2sql.Registry;
import com.github.mbto.maxmind.geoip2.csv2sql.streaming.BRWrapper;
import com.github.mbto.maxmind.geoip2.csv2sql.streaming.Location.IPBlock;
Expand All @@ -21,27 +20,31 @@
import static com.github.mbto.maxmind.geoip2.csv2sql.streaming.Event.WRITE;
import static com.github.mbto.maxmind.geoip2.csv2sql.streaming.converters.LocationsConverter.geonameIdsWithEmptyCountryIsoCode;
import static com.github.mbto.maxmind.geoip2.csv2sql.streaming.converters.LocationsConverter.ignoredGeonameIds;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.*;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.extractIPBlockFilename;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.threadPrintln;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.placeholder.ParseUtils.StringUtils.split2;

/**
* 1 thread reads GeoLite2-(Country|City)-Blocks-(IPv4|IPv6).csv file and fill messageQueue
*/
public class IPBlockConverter extends AbstractConverter {
private final String[] priorityGeonameIdGroupNames;
private final boolean logUndefinedAllGeonameIds;

public IPBlockConverter(Registry registry, String dataType, int queueCapacity) {
super(registry, dataType, queueCapacity);
super(registry, dataType, queueCapacity,
Boolean.parseBoolean(registry.getFromExportSection("log_ignored_ipblocks", true)));

String priorityGeonameIdGroupNamesRaw = registry.getFromExportSection("ipblocks_priority_geonameId_groupNames", true);
if(!priorityGeonameIdGroupNamesRaw.isEmpty()) {
String priorityGeonameIdGroupNamesRaw = registry.getFromExportSection("ipblocks_priority_geonameId_groupNames", false);
if(priorityGeonameIdGroupNamesRaw != null && !priorityGeonameIdGroupNamesRaw.isEmpty()) {
String[] priorityGeonameIdGroupNames = split2(priorityGeonameIdGroupNamesRaw, ',', true, true);
if(priorityGeonameIdGroupNames.length != 0)
this.priorityGeonameIdGroupNames = priorityGeonameIdGroupNames;
else
this.priorityGeonameIdGroupNames = null;
} else
this.priorityGeonameIdGroupNames = null;
this.logUndefinedAllGeonameIds = Boolean.parseBoolean(registry.getFromExportSection("log_undefined_all_geonameIds", true));
}

@Override
Expand Down Expand Up @@ -81,12 +84,25 @@ public Void work() throws Throwable {
++geonameIdsCounter;
}
if(geonameIdsCounter == 0) {
registry.incStats(sif.getDataType() /*== dataType*/ + "_ignored");
if(logIgnored) {
threadPrintln(System.out, "Ignored '" + dataType/*== sif.getDataType()*/ + "' in " + getConverterName() + " by filter from line '" + line + "'");
}
registry.incStats(dataType + " ignored");
continue;
}
}
IPBlock ipBlock = new IPBlock(csvHolder, parseCidrFunc);
ipBlock.setPriorityGeonameId(findPriorityGeonameId(csvHolder));
Integer priorityGeonameId;
try {
priorityGeonameId = findPriorityGeonameId(csvHolder);
} catch (Throwable e) {
if(logUndefinedAllGeonameIds) {
threadPrintln(System.out, "Ignored '" + dataType + "' in " + getConverterName() + ", due " + e.getMessage() + " from line '" + line + "'");
}
registry.incStats(dataType + " ignored");
continue;
}
ipBlock.setPriorityGeonameId(priorityGeonameId);
for (Map.Entry<String, String> entry : ipBlock.getValues().entrySet()) {
String value = entry.getValue();
if (value == null)
Expand Down Expand Up @@ -130,7 +146,6 @@ private Integer findPriorityGeonameId(CsvHolder csvHolder) {
if(firstGeonameId != null)
return firstGeonameId;
throw new IllegalStateException("Failed to determine priority geoname_id by group names "
+ Arrays.toString(priorityGeonameIdGroupNames)
+ " from " + csvHolder.getValueByGroupName());
+ Arrays.toString(priorityGeonameIdGroupNames));
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.github.mbto.maxmind.geoip2.csv2sql.streaming.converters;

import com.github.mbto.maxmind.geoip2.csv2sql.Registry;
import com.github.mbto.maxmind.geoip2.csv2sql.Args.Locale;
import com.github.mbto.maxmind.geoip2.csv2sql.Registry;
import com.github.mbto.maxmind.geoip2.csv2sql.streaming.BRWrapper;
import com.github.mbto.maxmind.geoip2.csv2sql.streaming.Location;
import com.github.mbto.maxmind.geoip2.csv2sql.streaming.Location.LocationData;
Expand All @@ -16,17 +16,22 @@
import java.util.regex.Pattern;

import static com.github.mbto.maxmind.geoip2.csv2sql.streaming.Event.WRITE;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.*;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.extractLocationsFilenames;
import static com.github.mbto.maxmind.geoip2.csv2sql.utils.ProjectUtils.threadPrintln;

/**
* 1 thread reads GeoLite2-(Country|City)-Locations-XXXX.csv files and fill messageQueue
*/
public class LocationsConverter extends AbstractConverter {
public static final Set<Integer> geonameIdsWithEmptyCountryIsoCode = new HashSet<>();
public static final Set<Integer> ignoredGeonameIds = new HashSet<>();
private final boolean logUndefinedAllSubdivisionsAndCityName;

public LocationsConverter(Registry registry, int queueCapacity) {
super(registry, Location.class.getSimpleName().toLowerCase(), queueCapacity);
super(registry, Location.class.getSimpleName().toLowerCase(), queueCapacity,
Boolean.parseBoolean(registry.getFromExportSection("log_ignored_locations", true)));
this.logUndefinedAllSubdivisionsAndCityName = Boolean.parseBoolean(registry.getFromExportSection("log_undefined_all_subdivisions_and_city_name", true));

// multiple gradle tests not clean static variables
geonameIdsWithEmptyCountryIsoCode.clear();
ignoredGeonameIds.clear();
Expand Down Expand Up @@ -56,6 +61,7 @@ public Void work() throws Throwable {
if (csvHolder == null) throw new IllegalStateException("Unable to define csvHolder, due empty locations_filenames template");
Map<String, List<Pattern>> allowedLocationValuesByGroupName = registry.getAllowedLocationValuesByGroupName();
boolean isCityEdition = csvHolder.getHeaders().contains("city_name");
String dataTypeLabel = !isCityEdition ? "country" : "city";
Writer writer = new Writer(registry, dataType, messageQueue);
writerT = new Thread(new FutureTask<>(writer));
writerT.start();
Expand Down Expand Up @@ -84,7 +90,13 @@ public Void work() throws Throwable {
})) {
int geoname_id = Integer.parseInt(location.getValues().get("geoname_id"));
ignoredGeonameIds.add(geoname_id);
registry.incStats((!isCityEdition ? "country" : "city") + "_ignored");
if(logIgnored) {
threadPrintln(System.out, "Ignored '" + dataTypeLabel + "' in " + getConverterName()
+ " by filter '" + filteredGroupName + "'"
+ " only " + allowedLocationValuePatterns.toString()
+ " from " + csvHolder.getValueByGroupName());
}
registry.incStats(dataTypeLabel + " ignored");
continue outer;
}
}
Expand Down Expand Up @@ -117,6 +129,8 @@ public Void work() throws Throwable {
if (syntheticKey == null) { // geoname_id with 6255147 6255148 with empty country_iso_code
syntheticKey = locationValues.get("geoname_id");
geonameIdsWithEmptyCountryIsoCode.add(Integer.parseInt(syntheticKey));
threadPrintln(System.out, "Informing: '" + sif.getDataType() + "' in " + getConverterName()
+ " without country_iso_code from " + csvHolder.getValueByGroupName());
registry.incStats(sif.getDataType() + " includes which unknown");
}

Expand Down Expand Up @@ -185,6 +199,10 @@ public Void work() throws Throwable {
}
return false;
}).count() == 5)) {
if(logUndefinedAllSubdivisionsAndCityName) {
threadPrintln(System.out, "Informing: '" + sif.getDataType() + "' in " + getConverterName()
+ " without subdivision_* and city_name from " + csvHolder.getValueByGroupName());
}
registry.incStats(sif.getDataType() + " includes which unknown");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
public interface Constants {
String SOFTWARE_NAME = "maxmind-geoip2-csv2sql-converter";

String SOFTWARE_INFO = "MaxMind GeoIP2 csv2sql Converter v1.0\nhttps://github.com/mbto/" + SOFTWARE_NAME;
String SOFTWARE_INFO = "MaxMind GeoIP2 csv2sql Converter v1.1\nhttps://github.com/mbto/" + SOFTWARE_NAME;

Set<String> supportedLocales = new LinkedHashSet<>(
asList("en", "ru", "de", "es", "fr", "ja", "pt-BR", "zh-CN"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import com.github.mbto.maxmind.geoip2.csv2sql.utils.placeholder.Template;

import java.io.PrintStream;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
Expand Down Expand Up @@ -35,16 +34,16 @@ public abstract class ProjectUtils {
public static Path resolveConfigDirectory() {
String defaultURI = null;
try {
defaultURI = URLDecoder.decode(ProjectUtils.class.getResource("").toString(), StandardCharsets.UTF_8); // R:\\test%20test\\ -> R:\\test test\\
defaultURI = URLDecoder.decode(ProjectUtils.class.getResource("").toString(), StandardCharsets.UTF_8); // C:\\test%20test\\ -> C:\\test test\\

if (defaultURI.startsWith("file:/")) {
// file:/C:/idea/maxmind-geoip2-csv2sql-converter/build/classes/java/main/com/github/mbto/maxmind/geoip2/csv2sql/utils/
return Paths.get("").resolve("src").resolve("main").resolve("resources").toAbsolutePath();
} else {
// jar:file:/C:/idea/maxmind-geoip2-csv2sql-converter/build/distributions/maxmind-geoip2-csv2sql-converter-1.0/lib/maxmind-geoip2-csv2sql-converter-1.0.jar!/com/github/mbto/maxmind/geoip2/csv2sql/utils/
// jar:file:/C:/idea/maxmind-geoip2-csv2sql-converter/build/distributions/maxmind-geoip2-csv2sql-converter-1.1/lib/maxmind-geoip2-csv2sql-converter-1.1.jar!/com/github/mbto/maxmind/geoip2/csv2sql/utils/
String jarPrefix = "jar:file:/";
if (defaultURI.startsWith(jarPrefix)) {
// C:/idea/maxmind-geoip2-csv2sql-converter/build/distributions/maxmind-geoip2-csv2sql-converter-1.0/lib/maxmind-geoip2-csv2sql-converter-1.0.jar
// C:/idea/maxmind-geoip2-csv2sql-converter/build/distributions/maxmind-geoip2-csv2sql-converter-1.1/lib/maxmind-geoip2-csv2sql-converter-1.1.jar
String substring = defaultURI.substring(jarPrefix.length(), defaultURI.lastIndexOf('!'));
if(!substring.contains(":"))
substring = "/" + substring;
Expand Down
4 changes: 4 additions & 0 deletions src/main/resources/GeoLite2-City-CSV.mssql 2019.default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ ipv6_insert_filename=ipv6_%03d.sql
ipv6_indexes_filename=ipv6_indexes.sql
// One of valid geoname_id for ipv4_values/ipv6_values templates from Blocks-IPv4/Blocks-IPv6 files (${.priorityGeonameId} placeholder);
ipblocks_priority_geonameId_groupNames=geoname_id,registered_country_geoname_id,represented_country_geoname_id
log_ignored_locations=false
log_ignored_ipblocks=false
log_undefined_all_geonameIds=true
log_undefined_all_subdivisions_and_city_name=false

schema_name=maxmind_city
load_data_filename=load_data.sql
Expand Down
Loading

0 comments on commit a222efc

Please sign in to comment.