GavinRay97 edited a comment on issue #12570:
URL: https://github.com/apache/arrow/issues/12570#issuecomment-1059852761
Here is a Node.js script to download from the Nightlies and extract the
assets into Maven repository structure:
```json
{
"name": "arrow-download-nightly-as-maven-repo",
"scripts": {
"start": "node index.mjs"
},
"dependencies": {
"cross-fetch": "^3.1.5",
"jsdom": "^19.0.0"
}
}
```
```js
// index.mjs
// Run with: $ node index.mjs
import fetch from "cross-fetch"
import asyncFS from "fs/promises"
import { JSDOM } from "jsdom"
import path from "path"
import { fileURLToPath } from "url"
// Polyfill "__dirname" for Node.js ECMAScript Module filetype
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const ARROW_NIGHTLY_TAG_URL =
"https://github.com/ursacomputing/crossbow/releases/tag/nightly-2022-03-03-0-github-java-jars"
async function main() {
extractArrowNightlyJarsToLocalM2Repo(ARROW_NIGHTLY_TAG_URL)
}
main().catch((err) => {
console.error(err)
process.exit(1)
})
async function extractArrowNightlyJarsToLocalM2Repo(arrowNightlyTagUrl) {
// Parse HTML to DOM
const dom = await JSDOM.fromURL(arrowNightlyTagUrl)
const document = dom.window.document
// Get all <li> tags containing the asset name and download URL
const assetLinkEls = document.querySelectorAll("li.Box-row")
const assets = []
for (const el of assetLinkEls) {
const anchorTag = el.querySelector("a")
const assetFilename = anchorTag.textContent.trim()
const link = anchorTag.href
if (assetFilename.includes("Source code")) continue
const { library, version } =
getLibraryAndVersionFromAssetFilename(assetFilename)
if (assets[library]) {
assets[library].push({ version, link, assetFilename })
} else {
assets[library] = [{ version, link, assetFilename }]
}
}
for (const [library, versions] of Object.entries(assets)) {
for (const { version, link, assetFilename } of versions) {
const basePath = "org/apache/arrow"
const libPath = `${library}/${version}`
const fullPath = path.join(__dirname, "../", basePath, libPath)
asyncFS.mkdir(fullPath, { recursive: true })
console.log("Downloading " + assetFilename + " to " + fullPath)
await downloadUrlAssetToPath(link, path.join(fullPath,
assetFilename))
}
}
}
async function downloadUrlAssetToPath(url, filepath) {
const request = await fetch(url)
const content = await request.text()
return asyncFS.writeFile(filepath, content)
}
// M2 repo folder format:
// org/apache/arrow/<lib-name>/<version>/<lib-name>-<version>.(ext)
function getLibraryAndVersionFromAssetFilename(filename) {
const libraryAndVersionRegex =
/(?<library>.+)-(?<version>\d\.\d\.\d.dev\d+)/
return filename.match(libraryAndVersionRegex)?.groups
}
```
```sh
user@MSI:~/projects/arrow-download-nightly-as-maven-repo$ tree org/
org/
└── apache
└── arrow
├── arrow-algorithm
│ └── 8.0.0.dev165
│ ├── arrow-algorithm-8.0.0.dev165-javadoc.jar
│ ├── arrow-algorithm-8.0.0.dev165-sources.jar
│ ├── arrow-algorithm-8.0.0.dev165-tests.jar
│ ├── arrow-algorithm-8.0.0.dev165.jar
│ └── arrow-algorithm-8.0.0.dev165.pom
├── arrow-avro
│ └── 8.0.0.dev165
│ ├── arrow-avro-8.0.0.dev165-javadoc.jar
│ ├── arrow-avro-8.0.0.dev165-sources.jar
│ ├── arrow-avro-8.0.0.dev165-tests.jar
│ ├── arrow-avro-8.0.0.dev165.jar
│ └── arrow-avro-8.0.0.dev165.pom
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]