Last active
January 24, 2024 09:40
-
-
Save relyky/dcf2f373629f404a0f587b12acbc8b6d to your computer and use it in GitHub Desktop.
WebView2 範例, Regex, 網頁爬文
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private async void Form1_Load(object sender, EventArgs e) | |
{ | |
// init | |
cboStatus.SelectedIndex = 0; | |
// init WebView2 → webAgent | |
await webAgent.EnsureCoreWebView2Async(null); | |
webAgent.CoreWebView2.Navigate("https://www.itfseafarers.org/en/abandonment-list/seafarer-abandonment"); | |
} | |
private async void button2_Click(object sender, EventArgs e) | |
{ | |
labelResult.Text = string.Empty; | |
progressBar.Value = 0; | |
//string status = "disputed"; // All | open | disputed | inactive | resolved 。 | |
string status = (string)(cboStatus.Text ?? "ALL"); | |
string queryJs = $"document.querySelector('#edit-field-status-list-value').value = '{status}'; " | |
+ @"document.querySelector('#views-exposed-form-itf-current-cases-block-1').submit();"; | |
await webAgent.CoreWebView2.ExecuteScriptAsync(queryJs); | |
progressBar.Value = 10; | |
// 讓子彈飛 | |
await Task.Delay(1000); | |
progressBar.Value = 20; | |
await Task.Delay(1000); | |
progressBar.Value = 30; | |
await Task.Delay(1000); | |
progressBar.Value = 40; | |
await Task.Delay(1000); | |
progressBar.Value = 50; | |
await Task.Delay(1000); | |
progressBar.Value = 60; | |
// 檢畫面確定status 查詢條件無誤! | |
string regionResult = await webAgent.CoreWebView2.ExecuteScriptAsync(@"document.querySelector('#edit-field-region-target-id').value"); | |
string statusResult = await webAgent.CoreWebView2.ExecuteScriptAsync(@"document.querySelector('#edit-field-status-list-value').value"); | |
labelResult.Text = $"{regionResult} | {statusResult}"; | |
progressBar.Value = 70; | |
// 取回資訊 html 或直接用 js 取值出來 | |
string html = await webAgent.CoreWebView2.ExecuteScriptAsync(@"document.querySelector('#block-views-block-itf-current-cases-block-1 > div > div > table > tbody').outerHTML"); | |
string decodedHtml = Regex.Unescape(html); // 解碼:Unicode, \u003C => `<` | |
string decodedHtml2 = WebUtility.HtmlDecode(decodedHtml); // 解碼:htmle | |
progressBar.Value = 80; | |
// 解析資訊 => datainfo with RegEx | |
const string pattern = | |
@"(<td .*-vessel-name"">(?<vesselName>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-imo"">(?<imo>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-flag"">(?<flag>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-port"">(?<port>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-country"">(?<country>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-owed-wages"">(?<owedWages>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-seafarers"">(?<seafarers>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-nationalities"">(?<nationalities>.*)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-reported-to-itf is-active"">(?<reportedToItf>[\s\S]*?)<\/td>)[\s\r\n\t]*" + | |
@"(<td .*-status-list"">(?<statusList>.*)<\/td>)"; | |
List<SeafarerAbandonment> infoList = new(); | |
foreach (Match tr in Regex.Matches(decodedHtml2, @"<tr>([\s\S\r\n\t]*?)<\/tr>")) | |
{ | |
Match match = Regex.Match(tr.Value, pattern); | |
if (match.Success) | |
{ | |
var info = new SeafarerAbandonment(); | |
info.VesselName = match.Groups["vesselName"].ToString().Trim(); | |
info.Imo = match.Groups["imo"].ToString().Trim(); | |
info.Flag = match.Groups["flag"].ToString().Trim(); | |
info.Port = match.Groups["port"].ToString().Trim(); | |
info.Country = match.Groups["country"].ToString().Trim(); | |
info.OwedWages = match.Groups["owedWages"].ToString().Trim(); | |
info.Seafarers = match.Groups["seafarers"].ToString().Trim(); | |
info.Nationalities = match.Groups["nationalities"].ToString().Trim(); | |
info.ReportedToItf = match.Groups["reportedToItf"].ToString().Trim(); | |
info.StatusList = match.Groups["statusList"].ToString().Trim(); | |
infoList.Add(info); | |
} | |
} | |
progressBar.Value = 90; | |
//## 輸出到畫面 | |
string dataJson = JsonSerializer.Serialize(infoList, new JsonSerializerOptions | |
{ | |
WriteIndented = true, | |
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping, // 中文字不編碼 | |
}); | |
textBox1.Text = dataJson; | |
progressBar.Value = 100; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment