Replace sleep with actual URL fetching. Worker scrapes HTML title from URLs, respects robots.txt, and includes proper User-Agent headers. Scraped titles stored in SQLite and sent via webhook callback.
82 lines
1.7 KiB
Go
82 lines
1.7 KiB
Go
package db
|
|
|
|
import (
|
|
"database/sql"
|
|
"time"
|
|
|
|
_ "github.com/mattn/go-sqlite3"
|
|
)
|
|
|
|
type Job struct {
|
|
ID int
|
|
Status string
|
|
CreatedAt time.Time
|
|
WebhookURL string
|
|
URL string
|
|
Title string
|
|
}
|
|
|
|
func Init(dbPath string) (*sql.DB, error) {
|
|
db, err := sql.Open("sqlite3", dbPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
schema := `
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
webhook_url TEXT,
|
|
url TEXT,
|
|
title TEXT
|
|
);`
|
|
|
|
_, err = db.Exec(schema)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return db, nil
|
|
}
|
|
|
|
func CreateJob(db *sql.DB, webhookURL, url string) (int64, error) {
|
|
result, err := db.Exec("INSERT INTO jobs (status, webhook_url, url) VALUES ('pending', ?, ?)", webhookURL, url)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return result.LastInsertId()
|
|
}
|
|
|
|
func GetPendingJobs(db *sql.DB) ([]Job, error) {
|
|
rows, err := db.Query("SELECT id, status, created_at, webhook_url, url, title FROM jobs WHERE status = 'pending'")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var jobs []Job
|
|
for rows.Next() {
|
|
var job Job
|
|
var webhookURL, url, title sql.NullString
|
|
if err := rows.Scan(&job.ID, &job.Status, &job.CreatedAt, &webhookURL, &url, &title); err != nil {
|
|
return nil, err
|
|
}
|
|
job.WebhookURL = webhookURL.String
|
|
job.URL = url.String
|
|
job.Title = title.String
|
|
jobs = append(jobs, job)
|
|
}
|
|
return jobs, nil
|
|
}
|
|
|
|
func UpdateJobTitle(db *sql.DB, id int, title string) error {
|
|
_, err := db.Exec("UPDATE jobs SET title = ? WHERE id = ?", title, id)
|
|
return err
|
|
}
|
|
|
|
func MarkJobDone(db *sql.DB, id int) error {
|
|
_, err := db.Exec("UPDATE jobs SET status = 'done' WHERE id = ?", id)
|
|
return err
|
|
}
|