Acathur 4 jaren geleden
commit
f0a4ff7afd
12 gewijzigde bestanden met toevoegingen van 436 en 0 verwijderingen
  1. 14 0
      .gitignore
  2. 8 0
      .prettierrc
  3. 64 0
      dist/lib/ids.js
  4. 8 0
      dist/lib/index.js
  5. 60 0
      dist/lib/request.js
  6. 19 0
      package.json
  7. 101 0
      src/lib/ids.ts
  8. 2 0
      src/lib/index.ts
  9. 66 0
      src/lib/request.ts
  10. 8 0
      src/test/request.ts
  11. 22 0
      tsconfig.json
  12. 64 0
      yarn.lock

+ 14 - 0
.gitignore

@@ -0,0 +1,14 @@
+.DS_Store
+*.log
+node_modules
+dist/*
+!dist/lib
+
+# Editor directories and files
+.idea
+.vscode
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?

+ 8 - 0
.prettierrc

@@ -0,0 +1,8 @@
+{
+  "singleQuote": true,
+  "trailingComma": "none",
+  "semi": false,
+  "tabWidth": 2,
+  "printWidth": 160,
+  "arrowParens": "avoid"
+}

+ 64 - 0
dist/lib/ids.js

@@ -0,0 +1,64 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.Ids = exports.IdsChannel = void 0;
+const request_1 = require("./request");
+const crypto_1 = require("crypto");
+var IdsChannel;
+(function (IdsChannel) {
+    IdsChannel[IdsChannel["Csdn"] = 1] = "Csdn";
+    IdsChannel[IdsChannel["Wechat"] = 2] = "Wechat";
+    IdsChannel[IdsChannel["Zhihu"] = 3] = "Zhihu";
+    IdsChannel[IdsChannel["Juejin"] = 4] = "Juejin";
+})(IdsChannel = exports.IdsChannel || (exports.IdsChannel = {}));
+class Ids {
+    constructor(config) {
+        this.config = config;
+    }
+    async request(config) {
+        const { endpoint, accessKeyId, accessKeySecret } = this.config;
+        config.baseURL = endpoint;
+        config.url = config.url || '/';
+        const timestamp = Math.floor(Date.now() / 1000);
+        const data = config.url.toLowerCase() + timestamp;
+        const signature = crypto_1.createHmac('sha256', accessKeySecret).update(data).digest('base64');
+        config.headers = config.headers || {};
+        config.headers['Authorization'] = `IDS-HMAC-SHA256 Credential=${accessKeyId}/${timestamp},Signature=${signature}`;
+        const res = await request_1.request(config);
+        if (res.data.status != 1) {
+            console.error(res.data);
+            throw new Error(`[${res.data.status}] ${res.data.info}`);
+        }
+        return res.data;
+    }
+    getCrawlAuthors(channel) {
+        return this.request({
+            method: 'POST',
+            url: '/api/ids/getCrawlAuthors',
+            data: {
+                code: channel
+            }
+        });
+    }
+    getCrawlArticleRules(params) {
+        const { ids, channel } = params;
+        return this.request({
+            method: 'POST',
+            url: '/api/ids/getCrawlArticleRules',
+            data: {
+                code: channel,
+                sn_codes: ids
+            }
+        });
+    }
+    putArticle(data, rule) {
+        return this.request({
+            method: 'POST',
+            url: '/api/ids/putArticleData',
+            params: {
+                crawl: rule
+            },
+            data
+        });
+    }
+}
+exports.Ids = Ids;

+ 8 - 0
dist/lib/index.js

@@ -0,0 +1,8 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.IdsChannel = exports.Ids = exports.request = void 0;
+var request_1 = require("./request");
+Object.defineProperty(exports, "request", { enumerable: true, get: function () { return request_1.request; } });
+var ids_1 = require("./ids");
+Object.defineProperty(exports, "Ids", { enumerable: true, get: function () { return ids_1.Ids; } });
+Object.defineProperty(exports, "IdsChannel", { enumerable: true, get: function () { return ids_1.IdsChannel; } });

+ 60 - 0
dist/lib/request.js

@@ -0,0 +1,60 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.request = exports.axios = void 0;
+const chalk_1 = __importDefault(require("chalk"));
+const axios_1 = __importDefault(require("axios"));
+const DEF_TIMEOUT = 10000;
+const DEF_RETRIES = 2;
+const DEF_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36';
+exports.axios = axios_1.default.create({
+    headers: {
+        'user-agent': DEF_UA
+    },
+    withCredentials: true
+});
+const request = (config, retries = 0) => {
+    config.timeout = config.timeout || DEF_TIMEOUT;
+    config.maxRetries = config.maxRetries || DEF_RETRIES;
+    const _config = JSON.stringify(config);
+    const cancelTokenSource = axios_1.default.CancelToken.source();
+    config.cancelToken = cancelTokenSource.token;
+    console.debug(chalk_1.default.white(`[request] ${config.method || 'GET'} ${config.url}`, config.params ? JSON.stringify(config.params) : ''));
+    return new Promise((resolve, reject) => {
+        let done = false;
+        const timer = setTimeout(() => {
+            if (!done) {
+                cancelTokenSource.cancel();
+                // @ts-ignore
+                if (retries >= config.maxRetries) {
+                    reject(new Error(`max retries exceeded (${retries})`));
+                }
+                else {
+                    retries++;
+                    console.info(chalk_1.default.magenta(`[request] ${config.url} timeouted, canceled, retry ${retries}`));
+                    resolve(exports.request(JSON.parse(_config), retries));
+                }
+            }
+        }, config.timeout);
+        const startAt = Date.now();
+        exports.axios(config)
+            .then((res) => {
+            console.debug(chalk_1.default.white(`└─ ${res.status} ${Date.now() - startAt}ms`));
+            resolve(res);
+        })
+            .catch((e) => {
+            if (e.response && e.response.data) {
+                console.error('[error.message]', e.message);
+                console.error('[error.response]', e.response.data);
+            }
+            reject(e);
+        })
+            .finally(() => {
+            done = true;
+            clearTimeout(timer);
+        });
+    });
+};
+exports.request = request;

+ 19 - 0
package.json

@@ -0,0 +1,19 @@
+{
+  "name": "crawler-lib",
+  "version": "1.0.0",
+  "main": "dist/lib/index.js",
+  "module": "dist/lib/index.js",
+  "author": "Acathur",
+  "private": true,
+  "scripts": {
+    "build": "rm -rf dist && tsc"
+  },
+  "devDependencies": {
+    "typescript": "^4.1.3"
+  },
+  "dependencies": {
+    "@types/node": "^14.14.16",
+    "axios": "^0.21.1",
+    "chalk": "^4.1.0"
+  }
+}

+ 101 - 0
src/lib/ids.ts

@@ -0,0 +1,101 @@
+import { request, RequestConfig } from './request'
+import { createHmac } from 'crypto'
+
+export interface IdsConfig {
+  endpoint: string
+  accessKeyId: string
+  accessKeySecret: string
+}
+
+export enum IdsChannel {
+  Csdn = 1,
+  Wechat = 2,
+  Zhihu = 3,
+  Juejin = 4
+}
+
+export type IdsCrawlRule = 'fulldata' | 'extdata'
+
+export interface IdsArticle {
+  author_id: string | number
+  cover_url: string
+  code: IdsChannel
+  title: string
+  author_name?: string
+  sn_code: string
+  original_url?: string
+  source_address?: string
+  is_headline?: 0 | 1
+  label?: string
+  published_at: Date
+  body: string
+  is_original?: 0 | 1
+  external_read_num?: number
+  external_see_num?: number
+  external_like_num?: number
+  external_comment_num?: number
+}
+
+export class Ids {
+  config: IdsConfig
+
+  constructor(config: IdsConfig) {
+    this.config = config
+  }
+
+  private async request(config: RequestConfig) {
+    const { endpoint, accessKeyId, accessKeySecret } = this.config
+    config.baseURL = endpoint
+    config.url = config.url || '/'
+
+    const timestamp = Math.floor(Date.now() / 1000)
+    const data = config.url.toLowerCase() + timestamp
+    const signature = createHmac('sha256', accessKeySecret).update(data).digest('base64')
+
+    config.headers = config.headers || {}
+    config.headers['Authorization'] = `IDS-HMAC-SHA256 Credential=${accessKeyId}/${timestamp},Signature=${signature}`
+
+    const res = await request(config)
+
+    if (res.data.status != 1) {
+      console.error(res.data)
+      throw new Error(`[${res.data.status}] ${res.data.info}`)
+    }
+
+    return res.data
+  }
+
+  getCrawlAuthors(channel: IdsChannel) {
+    return this.request({
+      method: 'POST',
+      url: '/api/ids/getCrawlAuthors',
+      data: {
+        code: channel
+      }
+    })
+  }
+
+  getCrawlArticleRules(params: { ids: string[]; channel: IdsChannel }) {
+    const { ids, channel } = params
+
+    return this.request({
+      method: 'POST',
+      url: '/api/ids/getCrawlArticleRules',
+      data: {
+        code: channel,
+        sn_codes: ids
+      }
+    })
+  }
+
+  putArticle(data: IdsArticle, rule: IdsCrawlRule) {
+    return this.request({
+      method: 'POST',
+      url: '/api/ids/putArticleData',
+      params: {
+        crawl: rule
+      },
+      data
+    })
+  }
+}

+ 2 - 0
src/lib/index.ts

@@ -0,0 +1,2 @@
+export { request } from './request'
+export { Ids, IdsConfig, IdsChannel, IdsCrawlRule, IdsArticle } from './ids'

+ 66 - 0
src/lib/request.ts

@@ -0,0 +1,66 @@
+import chalk from 'chalk'
+import Axios, { AxiosRequestConfig, AxiosResponse } from 'axios'
+
+const DEF_TIMEOUT = 10000
+const DEF_RETRIES = 2
+const DEF_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36'
+
+export const axios = Axios.create({
+  headers: {
+    'user-agent': DEF_UA
+  },
+  withCredentials: true
+})
+
+export interface RequestConfig extends AxiosRequestConfig {
+  timeout?: number
+  maxRetries?: number
+}
+
+export const request = (config: RequestConfig, retries = 0): Promise<AxiosResponse<any>> => {
+  config.timeout = config.timeout || DEF_TIMEOUT
+  config.maxRetries = config.maxRetries || DEF_RETRIES
+  const _config = JSON.stringify(config)
+  const cancelTokenSource = Axios.CancelToken.source()
+  config.cancelToken = cancelTokenSource.token
+
+  console.debug(chalk.white(`[request] ${config.method || 'GET'} ${config.url}`, config.params ? JSON.stringify(config.params) : ''))
+
+  return new Promise((resolve, reject) => {
+    let done = false
+
+    const timer = setTimeout(() => {
+      if (!done) {
+        cancelTokenSource.cancel()
+
+        // @ts-ignore
+        if (retries >= config.maxRetries) {
+          reject(new Error(`max retries exceeded (${retries})`))
+        } else {
+          retries++
+          console.info(chalk.magenta(`[request] ${config.url} timeouted, canceled, retry ${retries}`))
+          resolve(request(JSON.parse(_config), retries))
+        }
+      }
+    }, config.timeout)
+
+    const startAt = Date.now()
+
+    axios(config)
+      .then((res) => {
+        console.debug(chalk.white(`└─ ${res.status} ${Date.now() - startAt}ms`))
+        resolve(res)
+      })
+      .catch((e) => {
+        if (e.response && e.response.data) {
+          console.error('[error.message]', e.message)
+          console.error('[error.response]', e.response.data)
+        }
+        reject(e)
+      })
+      .finally(() => {
+        done = true
+        clearTimeout(timer)
+      })
+  })
+}

+ 8 - 0
src/test/request.ts

@@ -0,0 +1,8 @@
+import { request } from '../lib'
+
+(async () => {
+  const res = await request({
+    url: 'https://www.google.com'
+  })
+  console.info('@res', res)
+})()

+ 22 - 0
tsconfig.json

@@ -0,0 +1,22 @@
+{
+  "compilerOptions": {
+    "target": "es2018",
+    "module": "commonjs",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "moduleResolution": "Node",
+    "outDir": "dist",
+    "emitDecoratorMetadata": true,
+    "experimentalDecorators": true,
+    "strictPropertyInitialization": false,
+    "noImplicitAny": false,
+    "types": [
+      "@types/node"
+    ]
+  },
+  "include": [
+    "src/**/*"
+  ]
+}

+ 64 - 0
yarn.lock

@@ -0,0 +1,64 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+"@types/node@^14.14.16":
+  version "14.14.16"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.16.tgz#3cc351f8d48101deadfed4c9e4f116048d437b4b"
+  integrity sha512-naXYePhweTi+BMv11TgioE2/FXU4fSl29HAH1ffxVciNsH3rYXjNP2yM8wqmSm7jS20gM8TIklKiTen+1iVncw==
+
+ansi-styles@^4.1.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
+  integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
+  dependencies:
+    color-convert "^2.0.1"
+
+axios@^0.21.1:
+  version "0.21.1"
+  resolved "https://registry.yarnpkg.com/axios/-/axios-0.21.1.tgz#22563481962f4d6bde9a76d516ef0e5d3c09b2b8"
+  integrity sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==
+  dependencies:
+    follow-redirects "^1.10.0"
+
+chalk@^4.1.0:
+  version "4.1.0"
+  resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a"
+  integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A==
+  dependencies:
+    ansi-styles "^4.1.0"
+    supports-color "^7.1.0"
+
+color-convert@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
+  integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
+  dependencies:
+    color-name "~1.1.4"
+
+color-name@~1.1.4:
+  version "1.1.4"
+  resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
+  integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
+
+follow-redirects@^1.10.0:
+  version "1.13.1"
+  resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.13.1.tgz#5f69b813376cee4fd0474a3aba835df04ab763b7"
+  integrity sha512-SSG5xmZh1mkPGyKzjZP8zLjltIfpW32Y5QpdNJyjcfGxK3qo3NDDkZOZSFiGn1A6SclQxY9GzEwAHQ3dmYRWpg==
+
+has-flag@^4.0.0:
+  version "4.0.0"
+  resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
+  integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
+
+supports-color@^7.1.0:
+  version "7.2.0"
+  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
+  integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
+  dependencies:
+    has-flag "^4.0.0"
+
+typescript@^4.1.3:
+  version "4.1.3"
+  resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.1.3.tgz#519d582bd94cba0cf8934c7d8e8467e473f53bb7"
+  integrity sha512-B3ZIOf1IKeH2ixgHhj6la6xdwR9QrLC5d1VKeCSY4tvkqhF2eqd9O7txNlS0PO3GrBAFIdr3L1ndNwteUbZLYg==