what is going on
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
node_modules/
|
||||
node_modules/puppeteer/
|
||||
84
db.js
Normal file
84
db.js
Normal file
@@ -0,0 +1,84 @@
|
||||
const dbData = {
|
||||
"2021": {
|
||||
"passCompletions": 18.1,
|
||||
"passAttempts": 28.6
|
||||
},
|
||||
"2022": {
|
||||
"passCompletions": 20.2,
|
||||
"passAttempts": 29
|
||||
},
|
||||
"games": [
|
||||
{
|
||||
"opponent": "florida",
|
||||
"date": "Sep 3, 2022",
|
||||
"utah_score": 26,
|
||||
"opponent_points": 29
|
||||
},
|
||||
{
|
||||
"opponent": "southernutah",
|
||||
"date": "Sep 10, 2022",
|
||||
"utah_score": 73,
|
||||
"opponent_points": 7
|
||||
},
|
||||
{
|
||||
"opponent": "sandiegostate",
|
||||
"date": "Sep 17, 2022",
|
||||
"utah_score": 35,
|
||||
"opponent_points": 7
|
||||
},
|
||||
{
|
||||
"opponent": "arizonastate",
|
||||
"date": "Sep 24, 2022",
|
||||
"utah_score": 34,
|
||||
"opponent_points": 13
|
||||
},
|
||||
{
|
||||
"opponent": "oregonstate",
|
||||
"date": "Oct 1, 2022",
|
||||
"utah_score": 42,
|
||||
"opponent_points": 16
|
||||
},
|
||||
{
|
||||
"opponent": "ucla",
|
||||
"date": "Oct 8, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
},
|
||||
{
|
||||
"opponent": "usc",
|
||||
"date": "Oct 15, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
},
|
||||
{
|
||||
"opponent": "washingtonstate",
|
||||
"date": "Oct 27, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
},
|
||||
{
|
||||
"opponent": "arizona",
|
||||
"date": "Nov 5, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
},
|
||||
{
|
||||
"opponent": "stanford",
|
||||
"date": "Nov 12, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
},
|
||||
{
|
||||
"opponent": "oregon",
|
||||
"date": "Nov 19, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
},
|
||||
{
|
||||
"opponent": "colorado",
|
||||
"date": "Nov 26, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
88
db.json
Normal file
88
db.json
Normal file
@@ -0,0 +1,88 @@
|
||||
{
|
||||
"games": [
|
||||
{
|
||||
"florida": {
|
||||
"date": "Sep 3, 2022",
|
||||
"utah_score": 26,
|
||||
"opponent_points": 29
|
||||
}
|
||||
},
|
||||
{
|
||||
"southernutah": {
|
||||
"date": "Sep 10, 2022",
|
||||
"utah_score": 73,
|
||||
"opponent_points": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"sandiegostate": {
|
||||
"date": "Sep 17, 2022",
|
||||
"utah_score": 35,
|
||||
"opponent_points": 7
|
||||
}
|
||||
},
|
||||
{
|
||||
"arizonastate": {
|
||||
"date": "Sep 24, 2022",
|
||||
"utah_score": 34,
|
||||
"opponent_points": 13
|
||||
}
|
||||
},
|
||||
{
|
||||
"oregonstate": {
|
||||
"date": "Oct 1, 2022",
|
||||
"utah_score": 42,
|
||||
"opponent_points": 16
|
||||
}
|
||||
},
|
||||
{
|
||||
"ucla": {
|
||||
"date": "Oct 8, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"usc": {
|
||||
"date": "Oct 15, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"washingtonstate": {
|
||||
"date": "Oct 27, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"arizona": {
|
||||
"date": "Nov 5, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"stanford": {
|
||||
"date": "Nov 12, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"oregon": {
|
||||
"date": "Nov 19, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"colorado": {
|
||||
"date": "Nov 26, 2022",
|
||||
"utah_score": 0,
|
||||
"opponent_points": 0
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
hero-background.jpeg
Normal file
BIN
hero-background.jpeg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.0 MiB |
71
index.html
Normal file
71
index.html
Normal file
@@ -0,0 +1,71 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Document</title>
|
||||
|
||||
<link rel="stylesheet" href="./styles.css"></link>
|
||||
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<main>
|
||||
|
||||
<section class="bg-center bg-scroll bg-cover flex hero h-96 mx-auto items-center justify-end overflow-hidden relative" style="background-image: url(./hero-background.jpeg)">
|
||||
<div class="relative z-50 mr-40">
|
||||
<h1 class="text-4xl font-black text-white">Utes Stats</h1>
|
||||
<p class="text-white font-medium">I have no idea what I'm doing!</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<div class="container mx-auto px-4">
|
||||
|
||||
<section class="py-12" aria-label="Team Statistics">
|
||||
<h2 class="text-2xl font-black text-center mb-12 section-header">2021-2022 Stat Comparisons</h2>
|
||||
<div class="flex justify-around">
|
||||
<div class="stat-container">
|
||||
<h2 class="font-black">Pass Completions</h2>
|
||||
<canvas id="passCompletions"></canvas>
|
||||
</div>
|
||||
|
||||
<div class="stat-container">
|
||||
<h2 class="font-black">Pass Attempts</h2>
|
||||
<canvas id="passAttempts"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="py-12" aria-label="Team Game Schedule">
|
||||
<h2 class="text-2xl font-black text-center mb-12 section-header">Game Schedule</h2>
|
||||
<div class="items-center flex justify-evenly">
|
||||
<div class="text-center">
|
||||
<h2 id="utahScore" class="text-4xl font-black text-red">36</h2>
|
||||
<h3 id="opponent" class="text-xl uppercase font-black">Utah</h3>
|
||||
</div>
|
||||
<div class="text-center">
|
||||
<h2 class="text-4xl uppercase font-black game-versus">VS</h2>
|
||||
<h3 class="text-2xl uppercase font-black">Sep 3, 2022</h3>
|
||||
</div>
|
||||
<div class="text-center">
|
||||
<h2 id="opponentScore" class="text-4xl font-black text-red">29</h2>
|
||||
<h3 id="opponent" class="text-xl uppercase font-black">Florida</h3>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
</div>
|
||||
|
||||
</main>
|
||||
|
||||
<footer class="text-center py-12 text-white">
|
||||
<p>* All this data is Web Scraped from <a class="text-red font-medium" href="https://www.sports-reference.com">Sports Reference</a></p>
|
||||
</footer>
|
||||
|
||||
<script src="./db.js"></script>
|
||||
<script src="./scripts.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
6
jest-puppeteer.config.js
Normal file
6
jest-puppeteer.config.js
Normal file
@@ -0,0 +1,6 @@
|
||||
module.exports = {
|
||||
launch: {
|
||||
headless: true,
|
||||
slowMo: 30
|
||||
}
|
||||
};
|
||||
7
jest.config.js
Normal file
7
jest.config.js
Normal file
@@ -0,0 +1,7 @@
|
||||
const config = {
|
||||
verbose: true,
|
||||
preset: 'jest-puppeteer',
|
||||
setupFilesAfterEnv: ["expect-puppeteer"]
|
||||
};
|
||||
|
||||
module.exports = config;
|
||||
7868
package-lock.json
generated
Normal file
7868
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
package.json
Normal file
21
package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "web-scraper",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "scripts.js",
|
||||
"scripts": {
|
||||
"test": "jest"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"expect-puppeteer": "^6.1.1",
|
||||
"jest": "^29.1.2",
|
||||
"jest-puppeteer": "^6.1.1",
|
||||
"puppeteer": "^18.0.5",
|
||||
"puppeteer-extra": "^3.3.4",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.13.5",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.1"
|
||||
}
|
||||
}
|
||||
136
scraper.js
Normal file
136
scraper.js
Normal file
@@ -0,0 +1,136 @@
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const { writeFile } = require('fs').promises;
|
||||
|
||||
/*************************************
|
||||
* THIS IS WHAT MAKES IT WORK
|
||||
* SHOUT OUT JORDAN HANSEN - https://cobaltintelligence.com/blog/avoid-being-blocked-with-puppeteer/
|
||||
* ALSO ANOTHER WAY TO BLOCK ALL ADS IS TO INTERCEPT ALL REQUESTS, AND FILTER ALL REQUESTS FOR THESE DOMAINS:
|
||||
* https://winhelp2002.mvps.org/hosts.txt
|
||||
* Example Code:
|
||||
* //now we read the host file
|
||||
var hostFile = fs.readFileSync('hosts.txt', 'utf8').split('\n');
|
||||
var hosts = {};
|
||||
for (var i = 0; i < hostFile.length; i++) {
|
||||
var frags = hostFile[i].split(' ');
|
||||
if (frags.length > 1 && frags[0] === '0.0.0.0') {
|
||||
hosts[frags[1].trim()] = true;
|
||||
}
|
||||
}
|
||||
* page.on('request', request => {
|
||||
var domain = null;
|
||||
if (task.input.blockads) {
|
||||
var frags = request.url().split('/');
|
||||
if (frags.length > 2) {
|
||||
domain = frags[2];
|
||||
}
|
||||
}
|
||||
if ((task.input.blockads && hosts[domain] === true) || (!task.input.includephotos && request.resourceType() === 'image')) {
|
||||
request.abort();
|
||||
}
|
||||
else {
|
||||
request.continue();
|
||||
}
|
||||
});
|
||||
*************************************/
|
||||
// const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
// puppeteer.use(StealthPlugin());
|
||||
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
|
||||
/************************************/
|
||||
|
||||
puppeteer
|
||||
.use(AdblockerPlugin({blockTrackers: true}))
|
||||
.launch({headless: false})
|
||||
.then( async browser => {
|
||||
|
||||
const schedulePage = await browser.newPage();
|
||||
await schedulePage.goto('https://www.sports-reference.com/cfb/schools/utah/2022-schedule.html');
|
||||
|
||||
const previousYearStatisticsPage = await browser.newPage();
|
||||
await previousYearStatisticsPage.goto('https://www.sports-reference.com/cfb/schools/utah/2021.html');
|
||||
|
||||
const currentYearStatisticsPage = await browser.newPage();
|
||||
await currentYearStatisticsPage.goto('https://www.sports-reference.com/cfb/schools/utah/2022.html');
|
||||
|
||||
// START BUILDING THE DATA PER PAGE
|
||||
|
||||
// Previous years statistics
|
||||
const previousYearStatistics = await previousYearStatisticsPage.evaluate( () => {
|
||||
let team = {
|
||||
2021: {}
|
||||
};
|
||||
|
||||
let passCompletion = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_cmp"]' );
|
||||
let passAttempts = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_att"]' );
|
||||
team[2021].passCompletions = Number(passCompletion.textContent);
|
||||
team[2021].passAttempts = Number(passAttempts.textContent);
|
||||
|
||||
return team;
|
||||
} );
|
||||
|
||||
// Current years statistics
|
||||
const currentYearStatistics = await currentYearStatisticsPage.evaluate( () => {
|
||||
let team = {
|
||||
2022: {}
|
||||
};
|
||||
|
||||
let passCompletion = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_cmp"]' );
|
||||
let passAttempts = document.querySelector( '.stats_table tr[data-row="0"] td[data-stat="pass_att"]' );
|
||||
team[2022].passCompletions = Number(passCompletion.textContent);
|
||||
team[2022].passAttempts = Number(passAttempts.textContent);
|
||||
|
||||
return team;
|
||||
} );
|
||||
|
||||
const scheduleResults = await schedulePage.evaluate( (() => {
|
||||
// Initialize data object
|
||||
let team = {
|
||||
games: []
|
||||
};
|
||||
|
||||
// Grab HTML Nodes from the document
|
||||
let dates = document.querySelectorAll( '#schedule td[data-stat="date_game"]' );
|
||||
let opponents = document.querySelectorAll( '#schedule td[data-stat="opp_name"]' );
|
||||
let utah_score = document.querySelectorAll( '#schedule td[data-stat="points"]' );
|
||||
let opponent_points = document.querySelectorAll( '#schedule td[data-stat="opp_points"]' );
|
||||
|
||||
// Loop through the HTML Nodes, and push the textContent to the Team Games array!
|
||||
// Example:
|
||||
// "Florida": {
|
||||
// games: [
|
||||
// { date: 'Sep 3, 2022', utah_score: 26, opponent_points: 29 }
|
||||
// ...
|
||||
// ]
|
||||
// }
|
||||
dates.forEach( (date, index) => {
|
||||
let key = opponents[index].textContent.replace( /([([)0-9^\s])/g, '' ).toLowerCase().trim();
|
||||
team.games.push({
|
||||
opponent: key,
|
||||
date: date.textContent,
|
||||
utah_score: utah_score[index].textContent !== '' ? Number(utah_score[index].textContent) : 0,
|
||||
opponent_points: opponent_points[index].textContent !== '' ? Number(opponent_points[index].textContent) : 0
|
||||
}
|
||||
);
|
||||
} );
|
||||
|
||||
// Finally return the data!
|
||||
return team;
|
||||
}) );
|
||||
|
||||
// END BUILDING THE DATA PER PAGE
|
||||
|
||||
// Convert data to JSON, and write the file!
|
||||
// NOTE: NEEDED TO CHANGE FILE TYPE TO .JS TO BE ABLE TO USE IT LOCALLY FOR CHARTS
|
||||
// I COULD SPIN UP AN EXPRESS SERVER, AND SERVE THAT JSON FILE ON AN API ENDPOINT OR JUST MAKE ENDPOINTS TO RETURN THIS DATA
|
||||
|
||||
scheduleResults[2021] = previousYearStatistics['2021'];
|
||||
scheduleResults[2022] = currentYearStatistics['2022'];
|
||||
|
||||
let data = JSON.stringify( scheduleResults, null, 2 );
|
||||
|
||||
console.log( data );
|
||||
|
||||
writeFile('db.js', `const dbData = ${data}`, 'utf8');
|
||||
|
||||
await browser.close();
|
||||
} )
|
||||
.catch( (err) => console.log(err) );
|
||||
12
scraper.test.js
Normal file
12
scraper.test.js
Normal file
@@ -0,0 +1,12 @@
|
||||
require('expect-puppeteer');
|
||||
|
||||
describe( 'Google', () => {
|
||||
beforeAll( async () => {
|
||||
await page.goto('https://www.sports-reference.com/cfb/schools/utah/2022-schedule.html', { waitUntil: 'domcontentloaded' });
|
||||
} );
|
||||
|
||||
it( 'should open a new page, and match title.', async () => {
|
||||
const title = await page.title();
|
||||
expect(title).toMatch("2022 Utah Utes Schedule and Results | College Football at Sports-Reference.com");
|
||||
} );
|
||||
} );
|
||||
67
scripts.js
Normal file
67
scripts.js
Normal file
@@ -0,0 +1,67 @@
|
||||
console.log( dbData );
|
||||
|
||||
const passCompletions = new Chart(document.getElementById('passCompletions'), {
|
||||
type: 'bar',
|
||||
data: {
|
||||
labels: ['2021 Pass Completions', '2022 Pass Completions'],
|
||||
datasets: [{
|
||||
data: [dbData['2021'].passCompletions, dbData['2022'].passCompletions],
|
||||
backgroundColor: [
|
||||
'#c00',
|
||||
'#c00'
|
||||
],
|
||||
borderColor: [
|
||||
'#c00',
|
||||
'#c00'
|
||||
],
|
||||
borderWidth: 1
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
plugins: {
|
||||
legend: {
|
||||
display: false
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const passAttempts = new Chart(document.getElementById('passAttempts'), {
|
||||
type: 'bar',
|
||||
data: {
|
||||
labels: ['2021 Pass Attempts', '2022 Pass Attempts'],
|
||||
datasets: [{
|
||||
data: [dbData['2021'].passAttempts, dbData['2022'].passAttempts],
|
||||
backgroundColor: [
|
||||
'#c00',
|
||||
'#c00'
|
||||
],
|
||||
borderColor: [
|
||||
'#c00',
|
||||
'#c00'
|
||||
],
|
||||
borderWidth: 1
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
plugins: {
|
||||
legend: {
|
||||
display: false
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
dbData.games.forEach( game => {
|
||||
console.log( game );
|
||||
} );
|
||||
37
styles.css
Normal file
37
styles.css
Normal file
@@ -0,0 +1,37 @@
|
||||
.hero:before {
|
||||
display: block;
|
||||
background-color: #c00;
|
||||
mix-blend-mode: multiply;
|
||||
content: '';
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
position: absolute;
|
||||
transform: translateX(50%) skew(-40deg);
|
||||
}
|
||||
|
||||
.text-red {
|
||||
color: #c00;
|
||||
}
|
||||
|
||||
.stat-container {
|
||||
border: 1px solid #efefef;
|
||||
padding: 1rem;
|
||||
border-radius: 0.25rem;
|
||||
}
|
||||
|
||||
.section-header:after {
|
||||
display: block;
|
||||
content: '';
|
||||
width: 4rem;
|
||||
background-color: #c00;
|
||||
height: 1px;
|
||||
margin: 1rem auto;
|
||||
}
|
||||
|
||||
.game-versus {
|
||||
color: #c00;
|
||||
}
|
||||
|
||||
footer {
|
||||
background-color: #18191B;
|
||||
}
|
||||
Reference in New Issue
Block a user