Skip to content

Commit 26aa57a

Browse files
committed
restruct
1 parent 3cded4f commit 26aa57a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+505
-286
lines changed

docker-compose/agent1.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"RabbitMQ": {
3-
"Exchange": "DOTNET_SPIDER",
3+
"Exchange": "DotnetSpider",
44
"Host": "192.168.124.200",
55
"UserName": "user",
66
"Password": "password"

docker-compose/agent2.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"RabbitMQ": {
3-
"Exchange": "DOTNET_SPIDER",
3+
"Exchange": "DotnetSpider",
44
"Host": "192.168.124.200",
55
"UserName": "user",
66
"Password": "password"

src/DotnetSpider.Agent/appsettings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"RabbitMQ": {
3-
"Exchange": "DOTNET_SPIDER",
3+
"Exchange": "DotnetSpider",
44
"HostName": "localhost",
55
"UserName": "user",
66
"Password": "password"

src/DotnetSpider.AgentCenter/appsettings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"RabbitMQ": {
3-
"Exchange": "DOTNET_SPIDER",
3+
"Exchange": "DotnetSpider",
44
"HostName": "localhost",
55
"UserName": "user",
66
"Password": "password"

src/DotnetSpider.HBase/HBaseStorage.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
using DotnetSpider.DataFlow;
77
using DotnetSpider.DataFlow.Storage;
88
using DotnetSpider.Extensions;
9-
using DotnetSpider.Infrastructure;
109
using Microsoft.Extensions.Configuration;
1110
using Microsoft.Extensions.DependencyInjection;
1211
using Microsoft.Extensions.Logging;
@@ -69,7 +68,7 @@ protected override async Task StoreAsync(DataFlowContext context)
6968

7069
var hash = context.Request.Hash;
7170

72-
var bytes = context.GetData(Consts.ResponseBytes);
71+
var bytes = context.MessageBytes;
7372
var data = Convert.ToBase64String(bytes);
7473

7574
var httpClient = context.ServiceProvider.GetRequiredService<IHttpClientFactory>().CreateClient(_rest);

src/DotnetSpider.Portal/Controllers/API/AgentController.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public async Task<IApiResult> DeleteAsync(string id)
7878
return new FailedResult("Agent is not exists");
7979
}
8080

81-
await _mq.PublishAsBytesAsync(string.Format(TopicNames.Spider, id.ToUpper()), new Exit {AgentId = id});
81+
await _mq.PublishAsBytesAsync(string.Format(Const.Topic.Spider, id.ToUpper()), new Exit {AgentId = id});
8282

8383
using (var conn = _dbContext.Database.GetDbConnection())
8484
{
@@ -98,7 +98,7 @@ public async Task<IApiResult> ExitAsync(string id)
9898
return new FailedResult("Agent is not exists");
9999
}
100100

101-
await _mq.PublishAsBytesAsync(string.Format(TopicNames.Spider, id.ToUpper()), new Exit {AgentId = id});
101+
await _mq.PublishAsBytesAsync(string.Format(Const.Topic.Spider, id.ToUpper()), new Exit {AgentId = id});
102102
return new ApiResult("OK");
103103
}
104104
}

src/DotnetSpider.Portal/Controllers/API/SpiderController.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ public async Task<bool> ExitAsync(int id)
265265
}
266266

267267
var spiderId = spiderHistory.Batch.ToUpper();
268-
var topic = string.Format(TopicNames.Spider, spiderHistory.Batch.ToUpper());
268+
var topic = string.Format(Const.Topic.Spider, spiderHistory.Batch.ToUpper());
269269
_logger.LogInformation($"Try stop spider {topic}");
270270
await _mq.PublishAsBytesAsync(topic,
271271
new Exit(spiderId));

src/DotnetSpider.Portal/appsettings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"ConnectionString": "Database='dotnetspider';Data Source=localhost;password=1qazZAQ!;User ID=root;Port=3306;",
33
"DatabaseType": "MySql",
44
"RabbitMQ": {
5-
"Exchange": "DOTNET_SPIDER",
5+
"Exchange": "DotnetSpider",
66
"HostName": "localhost",
77
"UserName": "user",
88
"Password": "password"

src/DotnetSpider.RabbitMQ/RabbitMQOptions.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ namespace DotnetSpider.RabbitMQ
22
{
33
public class RabbitMQOptions
44
{
5-
public string Exchange { get; set; } = "DOTNET_SPIDER";
5+
public string Exchange { get; set; } = "DotnetSpider";
66

77
public string HostName { get; set; }
88

src/DotnetSpider.Sample/DotnetSpider.Sample.csproj

+4
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,8 @@
2525
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
2626
</None>
2727
</ItemGroup>
28+
29+
<ItemGroup>
30+
<Folder Include="logs" />
31+
</ItemGroup>
2832
</Project>

src/DotnetSpider.Sample/appsettings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"RabbitMQ": {
3-
"Exchange": "DOTNET_SPIDER",
3+
"Exchange": "DotnetSpider",
44
"HostName": "localhost",
55
"UserName": "user",
66
"Password": "password"

src/DotnetSpider.Sample/samples/EntitySpider.cs

+5-9
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ public static async Task RunAsync()
2929
});
3030
builder.UseDownloader<HttpClientDownloader>();
3131
builder.UseSerilog();
32-
// builder.UseProxy<FakeProxySupplier, FakeProxyValidator>();
3332
builder.IgnoreServerCertificateError();
3433
builder.UseQueueDistinctBfsScheduler<HashSetDuplicateRemover>();
3534
await builder.Build().RunAsync();
@@ -41,16 +40,13 @@ public EntitySpider(IOptions<SpiderOptions> options, DependenceServices services
4140
{
4241
}
4342

44-
protected override async Task InitializeAsync(CancellationToken stoppingToken)
43+
protected override async Task InitializeAsync(CancellationToken stoppingToken = default)
4544
{
4645
AddDataFlow(new DataParser<CnblogsEntry>());
4746
AddDataFlow(GetDefaultStorage());
48-
for (var i = 1; i < 99; ++i)
49-
{
50-
await AddRequestsAsync(
51-
new Request(
52-
"https://news.cnblogs.com/n/page/" + i, new Dictionary<string, object> {{"网站", "博客园"}}));
53-
}
47+
await AddRequestsAsync(
48+
new Request(
49+
"https://news.cnblogs.com/n/page/1", new Dictionary<string, object> {{"网站", "博客园"}}));
5450
}
5551

5652
protected override (string Id, string Name) GetIdAndName()
@@ -63,7 +59,7 @@ protected override (string Id, string Name) GetIdAndName()
6359
[GlobalValueSelector(Expression = ".//a[@class='current']", Name = "类别", Type = SelectorType.XPath)]
6460
[GlobalValueSelector(Expression = "//title", Name = "Title", Type = SelectorType.XPath)]
6561
[FollowRequestSelector(Expressions = new[] {"//div[@class='pager']"})]
66-
public class CnblogsEntry : EntityBase<CnblogsEntry>
62+
class CnblogsEntry : EntityBase<CnblogsEntry>
6763
{
6864
protected override void Configure()
6965
{

src/DotnetSpider.Sample/samples/JsonEntitySpider.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using DotnetSpider.DataFlow.Storage;
66
using DotnetSpider.Downloader;
77
using DotnetSpider.Http;
8+
using DotnetSpider.Infrastructure;
89
using DotnetSpider.Scheduler;
910
using DotnetSpider.Scheduler.Component;
1011
using DotnetSpider.Selector;
@@ -36,7 +37,7 @@ protected override async Task InitializeAsync(CancellationToken stoppingToken)
3637
AddDataFlow(new DataParser<MyEntity>());
3738
AddDataFlow(GetDefaultStorage());
3839
await AddRequestsAsync(
39-
new Request("file://samples/test.json") {Downloader = DownloaderNames.File});
40+
new Request("file://samples/test.json") {Downloader = Const.Downloader.File});
4041
}
4142

4243
[Schema("json", "data")]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.ComponentModel.DataAnnotations;
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
using DotnetSpider.DataFlow.Parser;
7+
using DotnetSpider.DataFlow.Parser.Formatters;
8+
using DotnetSpider.DataFlow.Storage;
9+
using DotnetSpider.Downloader;
10+
using DotnetSpider.Http;
11+
using DotnetSpider.Infrastructure;
12+
using DotnetSpider.Proxy;
13+
using DotnetSpider.Scheduler;
14+
using DotnetSpider.Scheduler.Component;
15+
using DotnetSpider.Selector;
16+
using Microsoft.Extensions.Hosting;
17+
using Microsoft.Extensions.Logging;
18+
using Microsoft.Extensions.Options;
19+
using Serilog;
20+
21+
namespace DotnetSpider.Sample.samples
22+
{
23+
public class ProxySpider : Spider
24+
{
25+
public static async Task RunAsync()
26+
{
27+
var builder = Builder.CreateDefaultBuilder<ProxySpider>(options =>
28+
{
29+
options.Speed = 1;
30+
});
31+
builder.UseDownloader<HttpClientDownloader>();
32+
builder.UseSerilog();
33+
builder.UseProxy<FakeProxySupplier, FakeProxyValidator>();
34+
builder.IgnoreServerCertificateError();
35+
builder.UseQueueDistinctBfsScheduler<HashSetDuplicateRemover>();
36+
await builder.Build().RunAsync();
37+
}
38+
39+
public ProxySpider(IOptions<SpiderOptions> options, DependenceServices services,
40+
ILogger<Spider> logger) : base(
41+
options, services, logger)
42+
{
43+
}
44+
45+
protected override async Task InitializeAsync(CancellationToken stoppingToken)
46+
{
47+
AddDataFlow(new DataParser<CnblogsEntry>());
48+
AddDataFlow(GetDefaultStorage());
49+
for (var i = 1; i < 99; ++i)
50+
{
51+
await AddRequestsAsync(
52+
new Request(
53+
"https://news.cnblogs.com/n/page/" + i, new Dictionary<string, object> {{"网站", "博客园"}}));
54+
}
55+
}
56+
57+
protected override (string Id, string Name) GetIdAndName()
58+
{
59+
return (ObjectId.NewId().ToString(), "博客园");
60+
}
61+
62+
[Schema("cnblogs", "news")]
63+
[EntitySelector(Expression = ".//div[@class='news_block']", Type = SelectorType.XPath)]
64+
[GlobalValueSelector(Expression = ".//a[@class='current']", Name = "类别", Type = SelectorType.XPath)]
65+
[GlobalValueSelector(Expression = "//title", Name = "Title", Type = SelectorType.XPath)]
66+
[FollowRequestSelector(Expressions = new[] {"//div[@class='pager']"})]
67+
class CnblogsEntry : EntityBase<CnblogsEntry>
68+
{
69+
protected override void Configure()
70+
{
71+
HasIndex(x => x.Title);
72+
HasIndex(x => new {x.WebSite, x.Guid}, true);
73+
}
74+
75+
public int Id { get; set; }
76+
77+
[Required]
78+
[StringLength(200)]
79+
[ValueSelector(Expression = "类别", Type = SelectorType.Environment)]
80+
public string Category { get; set; }
81+
82+
[Required]
83+
[StringLength(200)]
84+
[ValueSelector(Expression = "网站", Type = SelectorType.Environment)]
85+
public string WebSite { get; set; }
86+
87+
[StringLength(200)]
88+
[ValueSelector(Expression = "Title", Type = SelectorType.Environment)]
89+
[ReplaceFormatter(NewValue = "", OldValue = " - 博客园")]
90+
public string Title { get; set; }
91+
92+
[StringLength(40)]
93+
[ValueSelector(Expression = "GUID", Type = SelectorType.Environment)]
94+
public string Guid { get; set; }
95+
96+
[ValueSelector(Expression = ".//h2[@class='news_entry']/a")]
97+
public string News { get; set; }
98+
99+
[ValueSelector(Expression = ".//h2[@class='news_entry']/a/@href")]
100+
public string Url { get; set; }
101+
102+
[ValueSelector(Expression = ".//div[@class='entry_summary']")]
103+
[TrimFormatter]
104+
public string PlainText { get; set; }
105+
106+
[ValueSelector(Expression = "DATETIME", Type = SelectorType.Environment)]
107+
public DateTime CreationTime { get; set; }
108+
}
109+
}
110+
}

src/DotnetSpider.Sample/samples/SpeedSpider.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ protected override async Task InitializeAsync(CancellationToken stoppingToken =
4040
{
4141
await AddRequestsAsync(new Request("https://news.cnblogs.com/n/page/" + i)
4242
{
43-
Downloader = DownloaderNames.Empty
43+
Downloader = Const.Downloader.Empty
4444
});
4545
}
4646

src/DotnetSpider.Spiders/appsettings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"RabbitMQ": {
3-
"Exchange": "DOTNET_SPIDER",
3+
"Exchange": "DotnetSpider",
44
"Host": "192.168.124.200",
55
"UserName": "user",
66
"Password": "password"

src/DotnetSpider.Tests/RequestTests.cs

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
using System.Collections.Generic;
22
using System.Text;
33
using System.Threading.Tasks;
4-
using DotnetSpider.Downloader;
54
using DotnetSpider.Extensions;
65
using DotnetSpider.Http;
76
using DotnetSpider.Infrastructure;
@@ -30,7 +29,7 @@ public async Task SerializeAndDeserialize1()
3029
{
3130
Method = "PUT",
3231
Agent = "Agent",
33-
Downloader = DownloaderNames.HttpClient,
32+
Downloader = Const.Downloader.HttpClient,
3433
Timestamp = 1000,
3534
PPPoERegex = "PPPoERegex"
3635
};
@@ -49,7 +48,7 @@ public async Task SerializeAndDeserialize1()
4948
Assert.Equal("PUT", r1.Method);
5049
Assert.Equal("Accept", r1.Headers.Accept);
5150
// Assert.Equal("Agent", r1.Agent);
52-
Assert.Equal(DownloaderNames.HttpClient, r1.Downloader);
51+
Assert.Equal(Const.Downloader.HttpClient, r1.Downloader);
5352
Assert.Equal("UserAgent", r1.Headers.UserAgent);
5453
Assert.Equal(1000, r1.Timestamp);
5554
Assert.Equal("PPPoERegex", r1.PPPoERegex);
@@ -63,7 +62,7 @@ public async Task SerializeAndDeserialize2()
6362
{
6463
Method = "PUT",
6564
Agent = "Agent",
66-
Downloader = DownloaderNames.HttpClient,
65+
Downloader = Const.Downloader.HttpClient,
6766
Timestamp = 1000,
6867
PPPoERegex = "PPPoERegex"
6968
};
@@ -78,7 +77,7 @@ public async Task SerializeAndDeserialize2()
7877
Assert.Equal("UserAgent", r1.Headers.UserAgent);
7978
Assert.Equal("Accept", r1.Headers.Accept);
8079
// Assert.Equal("Agent", r1.Agent);
81-
Assert.Equal(DownloaderNames.HttpClient, r1.Downloader);
80+
Assert.Equal(Const.Downloader.HttpClient, r1.Downloader);
8281

8382
Assert.Equal(1000, r1.Timestamp);
8483
Assert.Equal("PPPoERegex", r1.PPPoERegex);
@@ -109,7 +108,7 @@ public void DeepClone1()
109108
{
110109
Method = "PUT",
111110
Agent = "Agent",
112-
Downloader = DownloaderNames.HttpClient,
111+
Downloader = Const.Downloader.HttpClient,
113112
Timestamp = 1000,
114113
PPPoERegex = "PPPoERegex"
115114
};
@@ -128,7 +127,7 @@ public void DeepClone1()
128127
Assert.Equal("PUT", r1.Method);
129128

130129
// Assert.Equal("Agent", r1.Agent);
131-
Assert.Equal(DownloaderNames.HttpClient, r1.Downloader);
130+
Assert.Equal(Const.Downloader.HttpClient, r1.Downloader);
132131
Assert.Equal("UserAgent", r1.Headers.UserAgent);
133132
Assert.Equal("Accept", r1.Headers.Accept);
134133
Assert.Equal(1000, r1.Timestamp);
@@ -143,7 +142,7 @@ public void DeepClone2()
143142
{
144143
Method = "PUT",
145144
Agent = "Agent",
146-
Downloader = DownloaderNames.HttpClient,
145+
Downloader = Const.Downloader.HttpClient,
147146
Timestamp = 1000,
148147
PPPoERegex = "PPPoERegex"
149148
};
@@ -157,7 +156,7 @@ public void DeepClone2()
157156
Assert.Equal("UserAgent", r1.Headers.UserAgent);
158157
Assert.Equal("Accept", r1.Headers.Accept);
159158
// Assert.Equal("Agent", r1.Agent);
160-
Assert.Equal(DownloaderNames.HttpClient, r1.Downloader);
159+
Assert.Equal(Const.Downloader.HttpClient, r1.Downloader);
161160

162161
Assert.Equal(1000, r1.Timestamp);
163162
Assert.Equal("PPPoERegex", r1.PPPoERegex);

0 commit comments

Comments
 (0)