-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2. Release 2.4.5
- Loading branch information
邹嵩
committed
Mar 27, 2018
1 parent
33d1889
commit 70dd94d
Showing
20 changed files
with
300 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd"> | ||
<metadata> | ||
<id>DotnetSpider2.Core</id> | ||
<version>2.4.4</version> | ||
<version>2.4.5</version> | ||
<authors>[email protected];Walterwhatwater;xiaohuan0204</authors> | ||
<owners>[email protected]</owners> | ||
<iconUrl>https://github.com/zlzforever/DotnetSpider/blob/master/images/icon.png?raw=true</iconUrl> | ||
|
@@ -13,23 +13,23 @@ | |
<description>A .NET Standard web crawling library similar to WebMagic and Scrapy. It is a lightweight ,efficient and fast high-level web crawling & scraping framework for .NET</description> | ||
<dependencies> | ||
<group targetFramework=".NETStandard2.0"> | ||
<dependency id="Newtonsoft.Json" version="10.0.3"/> | ||
<dependency id="Newtonsoft.Json" version="11.0.2"/> | ||
<dependency id="NLog" version="5.0.0-beta09"/> | ||
<dependency id="HtmlAgilityPack" version="1.6.15"/> | ||
<dependency id="HtmlAgilityPack" version="1.7.2"/> | ||
<dependency id="System.Threading.Tasks.Parallel" version="4.3.0"/> | ||
<dependency id="System.Text.Encoding.CodePages" version="4.4.0"/> | ||
<dependency id="System.Runtime.InteropServices.RuntimeInformation" version="4.3.0"/> | ||
<dependency id="System.Diagnostics.Process" version="4.3.0"/> | ||
<dependency id="System.Configuration.ConfigurationManager" version="4.4.1"/> | ||
<dependency id="System.Data.SqlClient" version="4.4.2"/> | ||
<dependency id="System.Data.SqlClient" version="4.4.3"/> | ||
<dependency id="Microsoft.Extensions.DependencyModel" version="2.0.4"/> | ||
<dependency id="System.Runtime.Loader" version="4.3.0"/> | ||
<dependency id="System.Net.Ping" version="4.3.0"/> | ||
<dependency id="Polly" version="5.8.0" /> | ||
</group> | ||
<group targetFramework=".NETFramework4.5"> | ||
<dependency id="Newtonsoft.Json" version="10.0.3"/> | ||
<dependency id="HtmlAgilityPack" version="1.6.15"/> | ||
<dependency id="Newtonsoft.Json" version="11.0.2"/> | ||
<dependency id="HtmlAgilityPack" version="1.7.2"/> | ||
<dependency id="NLog" version="4.4.12"/> | ||
<dependency id="Polly" version="5.8.0" /> | ||
</group> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd"> | ||
<metadata> | ||
<id>DotnetSpider2.Extension</id> | ||
<version>2.4.3</version> | ||
<version>2.4.5</version> | ||
<authors>[email protected];Walterwhatwater;xiaohuan0204</authors> | ||
<owners>[email protected]</owners> | ||
<iconUrl>https://github.com/zlzforever/DotnetSpider/blob/master/images/icon.png?raw=true</iconUrl> | ||
|
@@ -13,34 +13,34 @@ | |
<description>A .NET Standard web crawling library similar to WebMagic and Scrapy. It is a lightweight ,efficient and fast high-level web crawling & scraping framework for .NET</description> | ||
<dependencies> | ||
<group targetFramework=".NETStandard2.0"> | ||
<dependency id="DotnetSpider2.Core" version="2.4.3" /> | ||
<dependency id="DotnetSpider2.Core" version="2.4.5" /> | ||
<dependency id="Dapper" version="1.50.2"/> | ||
<dependency id="MailKit" version="2.0.1"/> | ||
<dependency id="MailKit" version="2.0.2"/> | ||
<dependency id="MongoDB.Driver" version="2.5.0"/> | ||
<dependency id="MySql.Data" version="6.10.6"/> | ||
<dependency id="StackExchange.Redis" version="1.2.6" /> | ||
<dependency id="SSH.NET" version="2016.1.0" /> | ||
<dependency id="System.Runtime.Extensions" version="4.3.0"/> | ||
<dependency id="EPPlus.Core" version="1.5.4"/> | ||
<dependency id="Selenium.WebDriver" version="3.8.0"/> | ||
<dependency id="Npgsql" version="3.2.6"/> | ||
<dependency id="CassandraCSharpDriver" version="3.4.0.1"/> | ||
<dependency id="Selenium.WebDriver" version="3.11.0"/> | ||
<dependency id="Npgsql" version="3.2.7"/> | ||
<dependency id="CassandraCSharpDriver" version="3.4.1"/> | ||
<dependency id="MessagePack" version="1.7.3.4"/> | ||
</group> | ||
<group targetFramework=".NETFramework4.5" > | ||
<dependency id="DotnetSpider2.Core" version="2.4.3" /> | ||
<dependency id="DotnetSpider2.Core" version="2.4.5" /> | ||
<dependency id="Dapper" version="1.50.2"/> | ||
<dependency id="MailKit" version="2.0.1"/> | ||
<dependency id="MailKit" version="2.0.2"/> | ||
<dependency id="MongoDB.Driver" version="2.5.0"/> | ||
<dependency id="MySql.Data" version="6.9.11"/> | ||
<dependency id="StackExchange.Redis" version="1.2.6" /> | ||
<dependency id="FiddlerCore2" version="1.0.0"/> | ||
<dependency id="SSH.NET" version="2016.1.0" /> | ||
<dependency id="DotRas.for.Win7" version="1.3.0" /> | ||
<dependency id="EPPlus" version="4.1.1"/> | ||
<dependency id="Selenium.WebDriver" version="3.8.0"/> | ||
<dependency id="Npgsql" version="3.2.6"/> | ||
<dependency id="CassandraCSharpDriver" version="3.4.0.1"/> | ||
<dependency id="Selenium.WebDriver" version="3.11.0"/> | ||
<dependency id="Npgsql" version="3.2.7"/> | ||
<dependency id="CassandraCSharpDriver" version="3.4.1"/> | ||
<dependency id="MessagePack" version="1.7.3.4"/> | ||
</group> | ||
</dependencies> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
using DotnetSpider.Core.Redial; | ||
using System; | ||
using System.Collections.Generic; | ||
|
||
namespace DotnetSpider.Core.Scheduler | ||
{ | ||
public abstract class BaseScheduler : Named, IScheduler, IDisposable | ||
{ | ||
/// <summary> | ||
/// 爬虫对象 | ||
/// </summary> | ||
protected ISpider Spider { get; set; } | ||
|
||
/// <summary> | ||
/// 采集成功的链接数加 1 | ||
/// </summary> | ||
public abstract void IncreaseSuccessCount(); | ||
|
||
/// <summary> | ||
/// 采集失败的次数加 1 | ||
/// </summary> | ||
public abstract void IncreaseErrorCount(); | ||
|
||
/// <summary> | ||
/// 批量导入 | ||
/// </summary> | ||
/// <param name="requests">请求对象</param> | ||
public abstract void Import(IEnumerable<Request> requests); | ||
|
||
/// <summary> | ||
/// 是否会使用互联网 | ||
/// </summary> | ||
protected abstract bool UseInternet { get; set; } | ||
|
||
/// <summary> | ||
/// 剩余链接数 | ||
/// </summary> | ||
public abstract long LeftRequestsCount { get; } | ||
|
||
/// <summary> | ||
/// 总的链接数 | ||
/// </summary> | ||
public virtual long TotalRequestsCount { get; } | ||
|
||
/// <summary> | ||
/// 采集成功的链接数 | ||
/// </summary> | ||
public abstract long SuccessRequestsCount { get; } | ||
|
||
/// <summary> | ||
/// 采集失败的次数, 不是链接数, 如果一个链接采集多次都失败会记录多次 | ||
/// </summary> | ||
public abstract long ErrorRequestsCount { get; } | ||
|
||
/// <summary> | ||
/// 是否深度优先 | ||
/// </summary> | ||
public bool DepthFirst { get; set; } = true; | ||
|
||
/// <summary> | ||
/// 添加请求对象到队列 | ||
/// </summary> | ||
/// <param name="request">请求对象</param> | ||
public void Push(Request request) | ||
{ | ||
if (UseInternet) | ||
{ | ||
NetworkCenter.Current.Execute("sch-push", () => | ||
{ | ||
DoPush(request); | ||
}); | ||
} | ||
else | ||
{ | ||
DoPush(request); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// 初始化队列 | ||
/// </summary> | ||
/// <param name="spider">爬虫对象</param> | ||
public virtual void Init(ISpider spider) | ||
{ | ||
if (Spider == null) | ||
{ | ||
Spider = spider; | ||
} | ||
else | ||
{ | ||
throw new SpiderException("Scheduler already init"); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// 取得一个需要处理的请求对象 | ||
/// </summary> | ||
/// <returns>请求对象</returns> | ||
public abstract Request Poll(); | ||
|
||
/// <summary> | ||
/// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. | ||
/// </summary> | ||
public abstract void Dispose(); | ||
|
||
/// <summary> | ||
/// 导出整个队列 | ||
/// </summary> | ||
public virtual void Export() | ||
{ | ||
} | ||
|
||
protected virtual bool ShouldReserved(Request request) | ||
{ | ||
return request.CycleTriedTimes > 0 && request.CycleTriedTimes <= Spider.Site.CycleRetryTimes; | ||
} | ||
|
||
protected abstract void DoPush(Request request); | ||
} | ||
} |
Oops, something went wrong.