-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e17955d
commit 2c5bbdd
Showing
48 changed files
with
452 additions
and
786 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
<TargetFrameworks>net451;netstandard2.0</TargetFrameworks> | ||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild> | ||
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance> | ||
<Version>3.0.4</Version> | ||
<Version>3.0.5</Version> | ||
<Authors>[email protected];</Authors> | ||
<AssemblyName>DotnetSpider.Core</AssemblyName> | ||
<Copyright>Copyright 2018 Lewis Zou</Copyright> | ||
|
@@ -52,4 +52,4 @@ | |
<PackageReference Include="Microsoft.Extensions.Logging" Version="2.1.1" /> | ||
<PackageReference Include="Serilog.Extensions.Logging" Version="2.0.2" /> | ||
</ItemGroup> | ||
</Project> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text.RegularExpressions; | ||
using DotnetSpider.Downloader; | ||
|
||
namespace DotnetSpider.Core.Processor.Filter | ||
{ | ||
public class PatternFilter : IFilter | ||
{ | ||
private readonly List<string> _patterns; | ||
private readonly List<string> _excludePaterns; | ||
|
||
/// <summary> | ||
/// 构造方法 | ||
/// </summary> | ||
/// <param name="patterns">需要匹配的正则</param> | ||
public PatternFilter(params string[] patterns) : this(patterns, null) { } | ||
|
||
/// <summary> | ||
/// 构造方法 | ||
/// </summary> | ||
/// <param name="patterns">需要匹配的正则</param> | ||
/// <param name="excludePatters">需要排除匹配的正则</param> | ||
public PatternFilter(IEnumerable<string> patterns, IEnumerable<string> excludePatters = null) | ||
{ | ||
_patterns = patterns == null ? new List<string>() : new List<string>(patterns); | ||
_excludePaterns = excludePatters == null ? new List<string>() : new List<string>(excludePatters); | ||
} | ||
|
||
public bool IsMatch(Request request) | ||
{ | ||
if (_patterns.Count == 0 && _excludePaterns.Count == 0) return true; | ||
|
||
foreach (var pattern in _excludePaterns) | ||
{ | ||
if (Regex.IsMatch(request.Url, pattern)) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
foreach (var pattern in _patterns) | ||
{ | ||
if (Regex.IsMatch(request.Url, pattern)) | ||
{ | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
internal bool ContainsPattern(string pattern) | ||
{ | ||
return _patterns.Contains(pattern); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using DotnetSpider.Downloader; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace DotnetSpider.Core.Processor | ||
{ | ||
public interface IFilter | ||
{ | ||
bool IsMatch(Request request); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace DotnetSpider.Core.Processor | ||
{ | ||
public interface ILastPageChecker | ||
{ | ||
bool IsLastPage(Page page); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using DotnetSpider.Downloader; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace DotnetSpider.Core.Processor | ||
{ | ||
public interface IRequestExtractor | ||
{ | ||
IEnumerable<Request> Extract(Page page); | ||
} | ||
} |
Oops, something went wrong.