Last active
September 2, 2023 16:10
-
-
Save stu43005/31bd0b95d41c4f495cb0839f34878d6b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { writeAll } from "https://deno.land/[email protected]/streams/write_all.ts"; | |
import { load } from "npm:cheerio"; | |
import OpenAI from "npm:openai"; | |
// @deno-types="npm:@types/js-beautify" | |
import beautify from "npm:js-beautify"; | |
const res = await fetch("https://www.google.com/"); // 網址 | |
const html = await res.text(); | |
const $ = load(html); | |
$( | |
'head, header:nth-child(1), menu, nav, footer:nth-last-child(1), meta, script, link, style, noscript, svg' | |
).remove(); | |
$('*').each((i, el) => { | |
const $el = $(el); | |
if ($el.css('display') === 'none' || $el.css('visibility') === 'hidden') { | |
$el.remove(); | |
return; | |
} | |
const attrs = $el.attr() ?? {}; | |
for (const key of Object.keys(attrs)) { | |
if (key.startsWith('data-')) { | |
$el.removeAttr(key); | |
} | |
} | |
if (!$el.is('i')) { | |
$el.removeAttr('class'); | |
} | |
$el.removeAttr('style').removeAttr('width').removeAttr('height'); | |
}); | |
$('img').each((i, el) => { | |
const $el = $(el); | |
$el.removeAttr('srcset') | |
.removeAttr('sizes') | |
.removeAttr('loading') | |
.removeAttr('decoding'); | |
}); | |
$('p').each((i, el) => { | |
const $el = $(el); | |
if (!$el.html()?.trim()) { | |
$el.remove(); | |
} | |
}); | |
const content = beautify.html( | |
$('article, .post').first().html() ?? | |
$('main, #main').first().html() ?? | |
$('body').html() ?? | |
'', | |
{ | |
indent_size: 0, | |
preserve_newlines: false, | |
} | |
); // 內文 selector | |
const openai = new OpenAI({ | |
apiKey: "", | |
}); | |
const stream = await openai.chat.completions.create({ | |
messages: [ | |
// { role: "system", content: "使用者提供一個HTML內容,請總結該文章的主題在講什麼。" }, | |
// { role: "system", content: "You are a professional text summarizer, you can only summarize the text, don't interpret it." }, | |
// { role: "system", content: "Please summarize this text in the most concise language and must use Traditional Chinese language!" }, | |
// { role: "system", content: "user is providing a web page, please sumerize it and only provide 20 'topic tags' in Traditional Chinese and seperate the tags by half-spaced comma. Every tag should not be longer than 4 to 10 Chinese Characters." }, | |
{ role: "system", content: "You are a professional content summarizer, you can only summerize the content in Traditional Chinese by only providing at least 12 tags of its topic, don't interpret it. Seperate the labels by half-spaced comma." }, | |
{ role: "user", content: content }, | |
], | |
// model: "gpt-3.5-turbo", // 4,096 tokens | |
model: "gpt-3.5-turbo-16k", // 16,384 tokens | |
// model: "gpt-4", // 8,192 tokens | |
// model: "gpt-4-32k", // 32,768 tokens | |
temperature: 0, | |
max_tokens: 1000, | |
top_p: 1, | |
frequency_penalty: 1, | |
presence_penalty: 1, | |
stream: true, | |
}); | |
for await (const part of stream) { | |
const contentBytes = new TextEncoder().encode(part.choices[0]?.delta?.content || ""); | |
await writeAll(Deno.stdout, contentBytes); | |
} | |
// Error: This model's maximum context length is 4097 tokens. However, your messages resulted in 92137 tokens. Please reduce the length of the messages. | |
// Error: Rate limit reached for 10KTPM-200RPM in organization org-3U7piMvbYBFn9HiRMKECXCIg on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues. | |
// Error: Invalid value for 'content': expected a string, got null. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment