Node Basic Notes
Node App Structure
- Main ./index.js, ./server.js, or ./yourEntryFile.js in the root
- Supporting files in ./lib/
- Static HTTP files in ./public/
- Views or templates in ./views/
- Command-line executables in ./bin/
- Tests in ./test/ (or ./spec/ if you’re a Jasmine cool-aid drinker)
- npm scripts in ./scripts/
- Config in ./config/
- Documentation in ./doc/
- Examples in ./examples/
- Performance analysis in ./benchmarks/
- Native C/C++ source in ./source/
NPM CLI
NPM Mirrors
npm config set disturl https://npmmirror.com/mirrors/node/
npm config set chromedriver_cdnurl http://npmmirror.com/mirrors/chromedriver/
npm config set electron_mirror https://npmmirror.com/mirrors/electron/
npm config set electron_builder_binaries_mirror https://npmmirror.com/mirrors/electron-builder-binaries/
npm config set operadriver_cdnurl http://npmmirror.com/mirrors/operadriver/
npm config set phantomjs_cdnurl https://npmmirror.com/mirrors/phantomjs/
npm config set profiler_binary_host_mirror http://npmmirror.com/mirrors/node-inspector/
npm config set puppeteer_download_host https://npmmirror.com/mirrors/
npm config set python_mirror https://npmmirror.com/mirrors/python/
npm config set robotjs_binary_host https://npmmirror.com/mirrors/robotjs/
npm config set sass_binary_site https://npmmirror.com/mirrors/node-sass/
npm config set saucectl_install_binary_mirror https://npmmirror.com/mirrors/saucectl/
npm config set sentrycli_cdnurl https://npmmirror.com/mirrors/sentry-cli/
npm config set sharp_binary_host https://npmmirror.com/mirrors/sharp/
npm config set sharp_libvips_binary_host https://npmmirror.com/mirrors/sharp-libvips/
npm config set sqlite3_binary_site https://npmmirror.com/mirrors/sqlite3/
npm config set swc_binary_site https://npmmirror.com/mirrors/node-swc/
Node Version Manager
- Volta: Install and Run JS Tool Quickly and Seamlessly
- FNM: Rust Node Manager
- NVM: Node Version Manager
curl https://get.volta.sh | bash
volta install node
node -v
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash
wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash
# Install and use the latest version
nvm install node
nvm use node
nvm alias default node
# Install and use the latest LTS version
nvm install --lts
nvm use --lts
# Install and use specific version
nvm install 16
nvm use 16
nvm ls
# Update to latest version
nvm install 16
nvm install node
# Remove version
nvm uninstall 14
nvm uninstall 12
Basic Steps
npm adduser
mkdir proj/
# 修改 package.json 可再次运行此命令
# scope for everyone
npm init --scope=<username>
# 修改 package.json 可再次运行此命令(不接模块名为自动更新)
npm install -S <package>
npm install -D <package>
npm prune # 清除无用包
npm rm --save # --save 删除文件的同时更新 package.json 文件
npm ls
npm outdated # 去除过期包
Test Steps
{
"scripts": {
"test": "node test.js"
}
}
npm test
Publish Steps
latest
or alpha
:
npm publish
npm publish --tag [<tag>]
npm dist-tag add <pkg>@<version> [<tag>]
npm dist-tag rm <pkg> <tag>
npm dist-tag ls [<pkg>]
NPM registry token configuration:
npm config set @orgName:registry https://registry.example.com
npm config set //registry.example.com/:_authToken XXXXXTokenXXXXX
Release script from VitePress:
const fs = require('node:fs')
const path = require('node:path')
const chalk = require('chalk')
const { prompt } = require('enquirer')
const execa = require('execa')
const semver = require('semver')
const currentVersion = require('../package.json').version
const versionIncrements = ['patch', 'minor', 'major']
const inc = i => semver.inc(currentVersion, i)
const bin = name => path.resolve(__dirname, `../node_modules/.bin/${name}`)
function run(bin, args, opts = {}) {
return execa(bin, args, { stdio: 'inherit', ...opts })
}
const step = msg => console.log(chalk.cyan(msg))
async function main() {
let targetVersion
const { release } = await prompt({
type: 'select',
name: 'release',
message: 'Select release type',
choices: versionIncrements.map(i => `${i} (${inc(i)})`).concat(['custom']),
})
if (release === 'custom') {
targetVersion = (
await prompt({
type: 'input',
name: 'version',
message: 'Input custom version',
initial: currentVersion,
})
).version
} else {
targetVersion = release.match(/\((.*)\)/)[1]
}
if (!semver.valid(targetVersion))
throw new Error(`Invalid target version: ${targetVersion}`)
const { yes: tagOk } = await prompt({
type: 'confirm',
name: 'yes',
message: `Releasing v${targetVersion}. Confirm?`,
})
if (!tagOk)
return
// Update the package version.
step('\nUpdating the package version...')
updatePackage(targetVersion)
// Build the package.
step('\nBuilding the package...')
await run('yarn', ['build'])
// Generate the changelog.
step('\nGenerating the changelog...')
await run('yarn', ['changelog'])
await run('yarn', ['prettier', '--write', 'CHANGELOG.md'])
const { yes: changelogOk } = await prompt({
type: 'confirm',
name: 'yes',
message: `Changelog generated. Does it look good?`,
})
if (!changelogOk)
return
// Commit changes to the Git and create a tag.
step('\nCommitting changes...')
await run('git', ['add', 'CHANGELOG.md', 'package.json'])
await run('git', ['commit', '-m', `release: v${targetVersion}`])
await run('git', ['tag', `v${targetVersion}`])
// Publish the package.
step('\nPublishing the package...')
await run('yarn', [
'publish',
'--new-version',
targetVersion,
'--no-commit-hooks',
'--no-git-tag-version',
])
await run('npm', [
'publish',
'--registry',
'https://registry.npmjs.org',
'--access',
'public',
])
// Push to GitHub.
step('\nPushing to GitHub...')
await run('git', ['push', 'origin', `refs/tags/v${targetVersion}`])
await run('git', ['push'])
}
function updatePackage(version) {
const pkgPath = path.resolve(path.resolve(__dirname, '..'), 'package.json')
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'))
pkg.version = version
fs.writeFileSync(pkgPath, `${JSON.stringify(pkg, null, 2)}\n`)
}
main().catch(err => console.error(err))
Semantic Version
Semver:
- Patch release: bugfix and other minor changes.
- Minor release: new features not breaking API (backward compatible).
- Major release: new features breaking API (not backward compatible).
- Alpha (α): 预览版 (内部测试版), 会有很多 Bug, 一般只有测试人员使用.
- Beta (β): 测试版 (或者叫公开测试版), 会一直加入新的功能.
- RC (Release Candidate): 最终测试版本, 可能成为最终产品的候选版本.
- 多数开源软件会推出两个 RC 版本, 最后的 RC2 则成为正式版本.
npm version patch
npm publish
npm version minor
npm publish
npm version major
npm publish
Tab Completion
npm completion >> ~/.bashrc (or ~/.zshrc)
source ~/.zshrc
Basic Command
best practice: npm ci
for cache install (speed up installation)
// With package-lock.json exists:
npm ci
remove useless package
npm prune // uninstall node_modules not in package.json
npm outdated
Link Command
cd path/to/my-project
npm link path/to/my-utils
# in local B package, build local B binary (npm install -g B)
npm link
# in local A package, set `B` link in package.json to local B binary
npm link B
Security Command
npm audit fix
npm audit fix --force
NPX Command
Run local node_modules:
npm install eslint -D
npx eslint .
Run global package (not installed):
npx create-react-app react-app
Run specific version:
npx -p package1@next -p package2@next -c "command"
Run scripts with different node version:
npx -p node@version -- node index.js
Run remote repo/gist code:
npx user/repo#branch
npx gistUrl
NPX cache packages in ~/.npm/_npx
.
To get latest version package:
# https://github.com/return-0x0/node-clear-npx-cache.
# https://github.com/npm/cli/issues/2329.
# https://github.com/npm/cli/issues/2395.
# https://github.com/npm/cli/pull/2592.
# https://github.com/facebook/create-react-app/issues/10601.
# https://github.com/facebook/create-react-app/issues/12022.
npx clear-npx-cache
npx create-react-app app
NPM Dependencies
- Dependency Nesting/Hell (NPM v1).
- Dependency Flatten/Hoist (NPM v3).
- Dependency Consistent Lockfile (NPM v5 and Yarn).
- Dependency Hard/Symbol Links (PNPM):
- Hard links for global
.pnpm
store to save disk storage. - Symbol links for local require short path with non-flat
node_modules
to rectify doppelgangers and ghost/phantom dependencies problem.
- Hard links for global
peerDependencies
: 提示宿主环境去安装满足插件peerDependencies
所指定依赖的包, 然后在插件import
或者require
所依赖的包的时候, 永远都是引用宿主环境统一安装的 NPM 包, 最终解决插件与所依赖包不一致的问题.- 构建依赖树的过程中, 版本确认需要结合
package.json
和package-lock.json
:- 先确认
package-lock.json
安装版本, 符合规则就以此为准, 否则由package.json
声明的版本范围重新确认. - 若是在开发中手动更改包信息,
会导致 lockfile 版本信息异常,
也由
package.json
重新确认. - 确认好的依赖树会存到
package-lock.json
文件中.
- 先确认
- 同一个依赖, 更高版本的包会安装到顶层
node_modules
目录, 低版本的包会分散在某些依赖的node_modules
目录. - Lockfile 保证项目依赖结构的确定性, 保障项目在多环境运行的稳定性.
NPM Doppelgangers
- Singleton conflict: multiple version of same package in
node_modules
. - Types conflict: global
types
naming conflict.
NPM Ghost Dependency
NPM ghost (phantom) dependency:
- Imported packages from
dependencies of dependencies
: When updatedependencies
to minor version,dependencies of dependencies
may get major BREAKING version (It's legal forsemver
, whendependencies
API don't change). - Imported packages from
devDependencies
: When others install your library, such imported packages will missing, cause they aren't located in librarypackage.json
. - Imported packages from root
node_modules
in monorepo. When others install your library, such imported packages will missing, cause they aren't located in librarypackage.json
.
NPM Invalid Dependency
$ npm ls
package@version invalid
Modify package-lock.json
to remove locked invalid package version.
Package JSON
Bin
当设置了 bin
字段后,
在 package.json
script
字段中,
可以使用简写编写命令
(但是局部安装无法在 shell 下使用, 需 npx <bin-name>
).
Version
npm version major
npm version minor
npm version patch
NPM Workspaces
In root package.json
:
{
"workspaces": [
"./packages/*",
"./css/*",
"./angular/*",
"./react/*",
"./vue/*"
]
}
In root cwd
:
npm i
npm run lint -ws
npm run test -w package-a
npm i lodash -w package-b
npm i -D eslint -w package-c
Exports Field
exports
can define public API:
{
"exports": {
".": {
"types": "./dist/index.d.ts",
"module": "./dist/index.mjs",
"import": "./dist/index.mjs",
"require": "./dist/index.cjs",
"default": "./dist/index.mjs"
},
"./package.json": "./package.json"
},
"types": "./dist/index.d.ts",
"browser": "./dist/index.mjs",
"module": "./dist/index.mjs",
"main": "./dist/index.cjs"
}
exports
configures JavaScript level,
file packages/rest/build/gen/util/regexp-tools.js
can be imported via @github/rest/gen/util/regexp-tools
:
- Don't need to mention directory
build
/dist
in module specifiers. - Don't need to mention
.js
/.ts
in module specifiers.
{
"name": "@github/rest",
"private": true,
"exports": {
"./": "./dist/index.js",
"./gen/*": "./dist/gen/*.js",
"./client/*": "./dist/client/*.js",
"./contract": "./dist/contract.js",
"./state": "./dist/state.js",
"./package.json": "./package.json"
}
}
import octokit from '@github/rest'
import { Contract } from '@github/rest/contract'
import utils from '@github/rest/gen/utils'
import state from '@github/rest/state'
Resolutions
Besides git bisect
for debugging broken version,
revert to last working version with resolutions
field
will help to fix broken version too:
{
"resolutions": {
"rc-field-form": "1.44.0"
}
}
Package Lockfile
When kept in sync with its associated package.json
,
a lockfile will further lock down the exact dependencies and sub-dependencies,
so that everyone running npm i
or yarn
will install the exact same dependencies.
If the package.json
contains a range,
and a new in-range version is released that would break the build,
then essentially package.json
is in a state of broken,
even if the lockfile is still holding things together.
- Apps (web or Node.js) that aren't
require()
by other packages should pin all types of dependencies for greatest reliability/predictability. - Libraries that are
consumed
/required()
by others should keep using SemVer ranges for dependencies (purge multiple versionnode_modules
) but can use pinned devDependencies.
CLI Environment
配置文件以 .env
/JS(Object)
/JSON
/JSONP
/XML
/YML
格式单独存放,
方便读取.
# .env file (added to .gitignore)
NODE_ENV=development
PORT=8626
# Set your database/API connection information here
API_KEY=**************************
API_URL=**************************
// config.js
const dotenv = require('dotenv')
dotenv.config()
module.exports = {
endpoint: process.env.API_URL,
masterKey: process.env.API_KEY,
port: process.env.PORT,
}
// server.js
const { port } = require('./config')
console.log(`Your port is ${port}`) // 8626
Corepack
Corepack is a tool to help with managing versions of your package managers (package manager manager).
It exposes binary proxies for each supported package manager. It will identify whatever package manager is configured for current project, transparently install it if needed, and finally run it without requiring explicit user interactions.
# In npm project
corepack yarn
# In npm project
corepack pnpm
corepack enable yarn
corepack disable pnpm
Yarn
# Modify `/etc/hosts`
npm i -g yarn
cd project/
yarn set version berry
Setup basic configuration .yarnrc.yml
:
yarnPath: .yarn/releases/yarn-berry.cjs
nodeLinker: node-modules
npmPublishAccess: public
npmPublishRegistry: 'https://registry.npmjs.org'
npmRegistryServer: 'https://registry.npmjs.org'
Update .gitignore
file:
.yarn/*
!.yarn/patches
!.yarn/releases
!.yarn/plugins
!.yarn/sdks
!.yarn/versions
.pnp/
.pnp.js
Yarn Configuration
yarn config set nodeLinker node-modules --home
yarn config set npmPublishAccess public --home
yarn config set npmRegistryServer "https://registry.npmjs.org" --home
yarn config set yarnPath .yarn/releases/yarn-berry.cjs --home
yarn config set unsafeHttpWhitelist --json '["localhost", "*.example.com", "example.com"]'
Yarn Updates
One line to update all deps in monorepo:
yarn up @types/node
yarn up @types/react
yarn dedupe --strategy highest
Yarn Workspace
yarn workspace packageName build
Yarn Plugin
yarn plugin list
Yarn Patch
Modify package in node_modules
conveniently:
- Run
yarn patch <package>
will create copy ofpackage
totmp/xfs-xxxxxxxx/user/
. - After modify source code of
package
, runyarn patch-commit /tmp/xfs-xxxxxxxx/user --save
.
Yarn Berry Read World Case
- Gatsby:
yarn 1 with
.yarn/
directory. - Redux ToolKit: yarn 2.
- Babel: yarn 3.
- StoryBook: yarn 3.
PNPM
PNPM Installation
Using Corepack or npm (recommended installation):
# Using Corepack.
corepack enable pnpm
# Using npm.
npm install -g pnpm
alias np=pnpm
source ~/.zshrc
which pnpm
pnpm --version
pnpm store path
Using a standalone script (without Node.js installed):
# By script.
wget -qO- https://get.pnpm.io/install.sh | sh -
# By manual download.
mv ./pnpm-linux-x64 ./pnpm
chmod +x ./pnpm
./pnpm setup --force
PNPM Configuration
pnpm config set registry https://registry.npmmirror.com/
Self-Defined Module
Basic Modular Pattern
编写具有回调函数参数的模块
- 定义模块
function foo(x, y, callback) {
try {
if (paramNotValid())
throw new Error('Invalid parameters!')
else
callback(null, param)
} catch (error) {
callback(error, param)
}
}
- 使用模块
foo(a, b, (err, param) => {
if (err)
processError()
else
process()
})
Export Module
module.exports = function (args) {
/* ... */
}
CallBack Function
- 向定义最内层回调,可避免回套嵌套
server.on('request', (req, res) => {
const render = function (wsData) {
page = pageRender(req, session, userData, wsData)
}
const getWsInfo = function (userData) {
ws.get(req, render)
}
const getDbInfo = function (session) {
db.get(session.user, getWsInfo)
}
const getMemCached = function (req, res) {
memcached.getSession(req, getDbInfo)
}
})
Module Resolution
const x = require('./module')
:
/root/src/module.js
/root/src/module/package.json
+{ "main": "lib/mainModule.js" }
=/root/src/module/lib/mainModule.js
/root/src/module/index.js
const x = require('module')
:
/root/src/node_modules/module.js
/root/src/node_modules/module/package.json
(if it specifies amain
property)/root/src/node_modules/module/index.js
/root/node_modules/module.js
/root/node_modules/module/package.json
(if it specifies amain
property)/root/node_modules/module/index.js
/node_modules/module.js
/node_modules/module/package.json
(if it specifies amain
property)/node_modules/module/index.js
Node Module
- CommonJS 模块在执行阶段同步加载子模块文件, ES6 模块在预处理阶段加载子模块文件, ES6 模块在执行阶段也会加载子模块文件, 不过会使用预处理阶段的缓存.
- CommonJS 模块同步加载并执行模块文件, ES6 模块提前加载并执行模块文件. 异步通常被理解为延后一个时间节点执行, 所以说成异步加载是错误的.
- 从形式上看, CommonJS 模块整体导出一个包含若干个变量的对象, ES6 模块分开导出单个变量.
CommonJS Module
CommonJS
模块一般由包管理器提供的运行时实现.- 由于
require
语句直接分割了执行的代码块,CommonJS
模块的导入导出语句的位置会影响模块代码语句的执行结果.
const fs = require('node:fs')
const path = require('node:path')
const vm = require('node:vm')
function Module(id) {
this.id = id
this.exports = {}
}
Module.wrapper = [
'(function(exports, module, Require, __dirname, __filename) {',
'})',
]
Module._extensions = {
'.js': function (module) {
const content = fs.readFileSync(module.id, 'utf8')
const fnStr = Module.wrapper[0] + content + Module.wrapper[1]
const fn = vm.runInThisContext(fnStr)
fn.call(
module.exports, // Bind `this` to `module.exports`
module.exports,
module,
Require,
_dirname,
_filename
)
},
'.json': function (module) {
const json = fs.readFileSync(module.id, 'utf8')
module.exports = JSON.parse(json) // 把文件的结果放在exports属性上
},
}
function Require(modulePath) {
const absPathname = path.resolve(__dirname, modulePath)
const module = new Module(absPathname)
tryModuleLoad(module)
return module.exports
}
function tryModuleLoad(module) {
const extension = path.extname(module.id)
Module._extensions[extension](module)
}
EcmaScript Module
ES6
模块借助JS
引擎实现.JS
引擎实现了ES6
模块的底层核心逻辑.ES6
模块有 5 种状态, 分别为unlinked
,linking
,linked
,evaluating
andevaluated
(Module Environment Records).- 由于连接阶段会给导入模块变量创建绑定并初始化为子模块的对应变量,
子模块的对应变量在评估阶段会先被赋值,
所以导入模块变量获得了和函数声明变量一样的提升效果.
ES6
模块的import/export
位置不影响模块代码语句的执行结果. - Experimental
.mjs
file.
Process Module
Process Properties
process.pid
: 当前进程的进程号.process.version
: Node 的版本, 比如 v0.10.18.process.platform
: 当前系统平台, 比如 Linux.process.title
: 默认值为“node”, 可以自定义该值.process.argv
: 当前进程的命令行参数数组.process.env
: 指向当前 shell 的环境变量, 比如 process.env.HOME.process.execPath
: 运行当前进程的可执行文件的绝对路径.process.stdout
: 指向标准输出.process.stdin
: 指向标准输入.process.stderr
: 指向标准错误.
process.stdin.resume()
process.stdin.pipe(process.stdout)
Process Events
- Error events:
uncaughtException
.unhandledRejection
.
- Signal events:
SIGHUP
.SIGINT
.SIGQUIT
.SIGTERM
.
- Exit events:
beforeExit
.exit
.
- Node HTTP applications graceful shutdown library.
process.on('uncaughtException', (err) => {
console.log(`Uncaught exception: ${err.message}.`)
process.exit(1)
})
process.on('uncaughtException', (reason, promise) => {
console.log(`Unhandled rejection at ${promise}, reason: ${reason}.`)
process.exit(1)
})
process.on('SIGHUP', (signal) => {
console.log(`Process ${process.pid} received a SIGHUP signal.`)
process.exit(0)
})
process.on('SIGINT', (signal) => {
console.log(`Process ${process.pid} has been interrupted.`)
process.exit(0)
})
process.on('SIGQUIT', (signal) => {
console.log(`Process ${process.pid} received a SIGQUIT signal.`)
process.exit(0)
})
process.on('SIGTERM', (signal) => {
console.log(`Process ${process.pid} received a SIGTERM signal.`)
process.exit(0)
})
process.on('beforeExit', (code) => {
setTimeout(() => {
console.log(`Process will exit with code: ${code}.`)
process.exit(code)
})
})
process.on('exit', (code) => {
console.log(`Process exited with code: ${code}.`)
})
Process Information
- process.on()
- process.uptime(): 进程运行时长
- process.getgid/setgid/getuid/setuid();
- process.cwd()
- process.memoryUsage()
Process Event Loop and Counter
- process.nextTick()
Child Process
cp.spawn()
: 创建子进程, 拥有独立的 stdin/stdout/stderr 文件描述符cp.exec()
: 创建子进程, 并会在进程结束时调用传入的回调函数- Exec Library
- Each spawned Node.js child process is independent and has its own memory, event-loop, and V8 instance.
- Use
process.on
to communicate between parent and child process.
const cp = require('node:child_process')
cp.exec(
'ls -l',
{
encoding: 'uft-8',
timeout: 0,
maxBuffer: 200 * 1024,
killSignal: 'SIGTERM',
setsid: false,
cwd: null,
env: null,
},
(err, stdout, stderr) => {
if (!err) {
console.log(stdout)
console.log(stderr)
}
}
)
Worker Threads Module
Worker threads use threads to execute the work within the same process of the main application:
- Worker threads are lightweight compared to child processes.
- Worker threads can share memory (can transfer
ArrayBuffer
). - Each Node.js worker thread has its own independent Node.js runtime (including its own V8 instance, event loop, etc.) with its own isolated context, therefore no thread synchronization is usually needed.
// fibonacci-worker.js
const {
Worker,
isMainThread,
parentPort,
workerData,
} = require('node:worker_threads')
function fibonacci(num) {
if (num <= 1)
return num
return fibonacci(num - 1) + fibonacci(num - 2)
}
if (isMainThread) {
module.exports = n =>
new Promise((resolve, reject) => {
const worker = new Worker(__filename, {
workerData: n,
})
worker.on('message', resolve)
worker.on('error', reject)
worker.on('exit', (code) => {
if (code !== 0)
reject(new Error(`Worker stopped with exit code ${code}`))
})
})
} else {
const result = fibonacci(workerData)
parentPort.postMessage(result)
process.exit(0)
}
const http = require('node:http')
const fibonacciWorker = require('./fibonacci-worker')
const port = 3000
http
.createServer(async (req, res) => {
const url = new URL(req.url, `http://${req.headers.host}`)
console.log('Incoming request to:', url.pathname)
if (url.pathname === '/fibonacci') {
const n = Number(url.searchParams.get('n'))
console.log('Calculating fibonacci for', n)
const result = await fibonacciWorker(n)
res.writeHead(200)
return res.end(`Result: ${result}\n`)
} else {
res.writeHead(200)
return res.end('Hello World!')
}
})
.listen(port, () => console.log(`Listening on port ${port}...`))
Worker pool is needed:
- Creating a new worker/process is expensive. For best performance, they should be reused.
- No control over the number of workers/processes created without worker pool. This leaves vulnerable to DoS attacks.
- Worker pool library
File Module
FS API
- fs.createReadStream.
- fs.opendir.
- fs.readdir.
- fs.readFile.
- fs.readFileSync.
- fs.exists.
const fs = require('node:fs')
function readFile(filename) {
return new Promise((resolve, reject) => {
fs.readFile(filename, { encoding: 'utf8' }, (err, contents) => {
if (err) {
reject(err)
return
}
resolve(contents)
})
})
}
readFile('example.txt')
.then((contents) => {
console.log(contents)
})
.catch((err) => {
console.error(err.message)
})
import { promises as fs } from 'node:fs'
import { basename, dirname, join } from 'node:path'
async function* walk(dir: string): AsyncGenerator<string> {
for await (const d of await fs.opendir(dir)) {
const entry = join(dir, d.name)
if (d.isDirectory())
yield * walk(entry)
else if (d.isFile())
yield entry
}
}
async function run(arg = '.') {
if ((await fs.lstat(arg)).isFile())
return runTestFile(arg)
for await (const file of walk(arg)) {
if (
!dirname(file).includes('node_modules')
&& (basename(file) === 'test.js' || file.endsWith('.test.js'))
) {
console.log(file)
await runTestFile(file)
}
}
}
import fs from 'node:fs/promises'
import path from 'node:path'
async function traverse(directory) {
const files = await fs.readdir(directory)
files.forEach(async (file) => {
const filePath = path.join(directory, file)
const fileStat = await fs.stat(filePath)
if (fileStat.isFile()) {
const content = await fs.readFile(filePath, 'utf-8')
console.log(content)
} else if (fileStat.isDirectory()) {
await traverse(filePath)
}
})
}
module.exports = function ls(dirName, fileType, callback) {
const fs = require('node:fs')
const path = require('node:path')
fs.readdir(dirName, (err, list) => {
if (err)
return callback(err)
list = list.filter((file) => {
return path.extname(file) === `.${fileType}`
})
callback(null, list)
})
}
Buffer Object
const str = buf.toString()
Path API
- path.resolve: 自动按系统处理路径
- path.extname: 返回文件类型
const path = require('node:path')
console.log(path.extname('index.html')) // .html
path.normalize(p)
path.join([path1], [path2], [pathN])
path.resolve(from, to)
path.relative(from, to)
path.dirname(p)
path.basename(p, [ext])
path.extname(p)
const separator = path.sep
const delimiter = path.delimiter
Http Module
Request Object
属性
const request = {
method: 'POST',
}
Response Object
类型
typedef Stream response
事件
- 监听事件
response.on('data', (data) => {
process(data)
})
response.on('error', (err) => {
console.error(err)
})
response.on('end', () => {
stream.end()
})
- 发出事件
response.end() // 传输结束
方法
response.setEncoding('utf8') // 自动将 data 事件中 Buffer 对象转换成 String
// content-type: text/plain
// application/json
response.writeHead(200, { 'Content-Type': '' })
Http Get
http.get(url, (response) => {})
http.get(url, (response) => {
let pipeData = ''
response.setEncoding('utf8')
response.on('data', (data) => {
pipeData += data
})
response.on('end', () => {
console.log(pipeData.length)
console.log(pipeData)
})
})
Http Server
const server = http.createServer((request, response) => {
// 处理请求的逻辑...
})
server.listen(8000)
Sample
const net = require('node:net')
const chatServer = net.createServer()
// 用于检测僵尸客户端,用于及时清楚僵尸客户端
const clientList = []
chatServer.on('connection', (client) => {
client.name = `${client.remoteAddress}:${client.remotePort}`
client.write(`Hi ${client.name}!\n`)
clientList.push(client)
client.on('data', (data) => {
broadcast(data, client)
})
client.on('end', () => {
clientList.splice(clientList.indexOf(client), 1)
})
client.on('error', (e) => {
console.log(e)
})
})
function broadcast(message, client) {
const cleanup = []
for (let i = 0; i < clientList.length; i += 1) {
// 向其他人(排除自身)发送消息
if (client !== clientList[i]) {
if (clientList[i].writable) {
clientList[i].write(`${client.name} says ${message}`)
} else {
cleanup.push(clientList[i])
clientList[i].destroy()
}
}
}
// 清楚僵尸客户端
for (let i = 0; i < cleanup.length; i += 1)
clientList.splice(clientList.indexOf(cleanup[i]), 1)
}
chatServer.listen(9000)
Net Module
Socket Object
socket.write(data)
socket.end(data)
socket.end()
Socket IO
const fs = require('node:fs')
const http = require('node:http')
const io = require('socket.io')
const sockFile = fs.readFileSync('socket.html')
server = http.createServer()
server.on('request', (req, res) => {
res.writeHead(200, { 'content-type': 'text/html' })
res.end(sockFile)
})
server.listen(8080)
const socket = io.listen(server)
// 命名空间
socket.of('/upAndRunning').on('connection', (client) => {
console.log('Client connected to Up and Running namespace.')
client.send('Welcome to \'Up and Running\'')
})
socket.of('/weather').on('connection', (client) => {
console.log('Client connected to Weather namespace.')
client.send('Welcome to \'Weather Updates\'')
})
Basic Methods
const serverInstance = net.createServer((socket) => {})
serverInstance.listen(portNumber) // 开始监听特定端口
URL Module
Basic Method
parse
解析处 URL 各个组成部分:
- href
- protocol
- host
- auth
- hostname
- port
- pathname
- search
- query
- hash
// true 表示调用 queryString 模块查询字符串
url.parse(request.url, true)
dns
- dns.resolve
- dns.reverse
- dns.lookup
const dns = require('node:dns')
dns.lookup('google.com', 4, (e, a) => {
console.log(a)
})
dns.resolve('tazimi.tk', 'A', (e, r) => {
if (e)
console.log(e)
console.log(JSON.stringify(r, null, 2))
})
const dns = require('node:dns')
dns.resolve('tazimi.dev', 'A', (err, res) => {
if (err)
console.log(err)
else
console.log(`A: ${JSON.stringify(res, null, 2)}`)
})
dns.resolve('github.com', 'MX', (err, res) => {
if (err)
console.log(err)
else
console.log(`MX: ${JSON.stringify(res, null, 2)}`)
})
Security Module
Crypto
- hash algorithm
- hmac algorithm
- cipher/decipher algorithms
- signature/validate
Hash API
const crypto = require('node:crypto')
const md5 = crypto.createHash('md5')
md5.update('foo')
md5.digest('hex') // 'acbd18db4cc2f85cedef654fccc4a4d8'
HMAC API
openssl genrsa -out key.pem 1024
const crypto = require('node:crypto')
const fs = require('node:fs')
const pem = fs.readFileSync('key.pem')
const key = pem.toString('ascii')
const hmac = crypto.createHmac('sha1', key)
hmac.update('bar')
hmac.digest('hex') // '7x123'
UUID Generation
Enhance usability of unique identifiers by prefixing and encoding in base58:
// src/utils/id.ts
import { customAlphabet } from 'nanoid'
export const nanoid = customAlphabet('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz')
const prefixes = {
key: 'key',
api: 'api',
policy: 'pol',
request: 'req',
workspace: 'ws',
vercelBinding: 'vb',
keyAuth: 'key_auth', // <-- this is internal and does not need to be short or pretty
test: 'test', // <-- for tests only
} as const
export function newId(
prefix: keyof typeof prefixes,
length: number = 16
): string {
return [prefixes[prefix], nanoid(length)].join('_')
}
// app.ts
import { newId } from '@utils/id'
const id = newId('workspace')
// ws_dYuyGV3qMKvEbjML
const id = newId('keyY')
// invalid because `keyY` is not a valid prefix name
Async Module
对回调进行计数是处理 Node 中异步的基础 - 自定义 Semaphore 变量: 每完成一个异步处理, Semaphore++
Cluster Module
const cluster = require('node:cluster')
const http = require('node:http')
const numCPUs = require('node:os').cpus().length
const rssWarn = 50 * 1024 * 1024
const heapWarn = 50 * 1024 * 1024
const workers = {}
if (cluster.isMaster) {
for (let i = 0; i < numCPUs; i++)
createWorker()
setInterval(() => {
const time = new Date().getTime()
for (pid in workers) {
if (
Object.prototype.hasOwnProperty.call(workers, pid)
&& workers[pid].lastCb + 5000 < time
) {
console.log(`Long running worker ${pid} killed`)
workers[pid].worker.kill()
delete workers[pid]
createWorker()
}
}
}, 1000)
} else {
// Server
http
.Server((req, res) => {
// mess up 1 in 200 request
if (Math.floor(Math.random() * 200) === 4) {
console.log(`Stopped ${process.pid} from ever finishing`)
while (true)
continue
}
res.writeHead(200)
res.end(`hello world from ${process.pid}\n`)
})
.listen(8000)
// Report stats once a second
setInterval(() => {
process.send({
cmd: 'reportMem',
memory: process.memoryUsage(),
process: process.pid,
})
}, 1000)
}
function createWorker() {
const worker = cluster.fork()
console.log(`Created worker: ${worker.pid}`)
// allow boot time
workers[worker.pid] = { worker, lastCb: new Date().getTime() - 1000 }
worker.on('message', (m) => {
if (m.cmd === 'reportMem') {
workers[m.process].lastCb = new Date().getTime()
if (m.memory.rss > rssWarn)
console.log(`Worker ${m.process} using too much memory.`)
}
})
}
Test Module
Assert Module
assert.equal(expect, real, assertPrompt)
.assert.notEqual(expect, real, assertPrompt)
.assert.strictEqual(expect, real, assertPrompt)
.assert.notStrictEqual(expect, real, assertPrompt)
.assert.deepEqual(expect, real, assertPrompt)
.assert.notDeepEqual(expect, real, assertPrompt)
.assert.ok(var, assertPrompt): 测试对象真值(truthy/falsy)
.assert.throws(fn)
: 测试方法是否抛出异常.assert.doesNotThrow(fn)
: 测试方法是否抛出异常.
const assert = require('node:assert')
assert.equal(1, true, 'Truthy')
assert.notEqual(1, true, 'Truthy')
assert.ok(0, 'Zero is not truthy')
Debugging
Inspector CLI
node --inspect server.js # Start debugging.
node --inspect-brk server.js # Start debugging and break.
Enable core modules debug information:
NODE_DEBUG=fs,net,stream yarn test
Node Web Crawler
const axios = require('axios')
const cheerio = require('cheerio')
const playwright = require('playwright')
const url = 'https://scrapeme.live/shop/page/1/'
const useHeadless = false // "true" to use playwright
const maxVisits = 30 // Arbitrary number for the maximum of links visited
const visited = new Set()
const allProducts = []
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
async function getHtmlPlaywright(url) {
const browser = await playwright.firefox.launch()
const context = await browser.newContext()
const page = await context.newPage()
await page.goto(url)
const html = await page.content()
await browser.close()
return html
}
async function getHtmlAxios(url) {
const { data } = await axios.get(url)
return data
}
async function getHtml(url) {
return useHeadless ? await getHtmlPlaywright(url) : await getHtmlAxios(url)
}
function extractContent($) {
return $('.product')
.map((_, product) => {
const $product = $(product)
return {
id: $product.find('a[data-product_id]').attr('data-product_id'),
title: $product.find('h2').text(),
price: $product.find('.price').text(),
}
})
.toArray()
}
function extractLinks($) {
return [
...new Set(
$('.page-numbers a')
.map((_, a) => $(a).attr('href'))
.toArray()
),
]
}
async function crawl(url) {
visited.add(url)
console.log('Crawl: ', url)
const html = await getHtml(url)
const $ = cheerio.load(html)
const content = extractContent($)
const links = extractLinks($)
links
.filter(link => !visited.has(link))
.forEach((link) => {
q.enqueue(crawlTask, link)
})
allProducts.push(...content)
// We can see how the list grows. Gotta catch 'em all!
console.log(allProducts.length)
}
// Change the default concurrency or pass it as param
function queue(concurrency = 4) {
let running = 0
const tasks = []
return {
enqueue: async (task, ...params) => {
tasks.push({ task, params })
if (running >= concurrency)
return
++running
while (tasks.length) {
const { task, params } = tasks.shift()
await task(...params)
}
--running
},
}
}
async function crawlTask(url) {
if (visited.size >= maxVisits) {
console.log('Over Max Visits, exiting')
return
}
if (visited.has(url))
return
await crawl(url)
}
const q = queue()
q.enqueue(crawlTask, url)
Web scraping with impersonation:
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
puppeteer.use(StealthPlugin());
(async () => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto('https://example.com')
// Now your Puppeteer script is enhanced with advanced evasion techniques
// Proceed with your web scraping tasks
await browser.close()
})()
const { chromium, devices } = require('playwright')
const iPhone11 = devices['iPhone 11'];
(async () => {
const browser = await chromium.launch()
const context = await browser.newContext({
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+ '(KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
geolocation: { latitude: 48.8584, longitude: 2.2945 }, // Paris, France
permissions: ['geolocation'],
locale: 'fr-FR',
...iPhone11,
})
const page = await context.newPage()
await page.goto('https://example.com')
// Your scraping logic here
await browser.close()
})()
Deno
- Node.js library to Deno guide.