Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
340 changes: 166 additions & 174 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"typings"
],
"scripts": {
"build": "npm run clean && rollup -c",
"build": "npm run clean && rollup -c --inline",
Comment thread
jdalton marked this conversation as resolved.
"clean": "rm -rf ./lib/ && mkdir lib ",
"lint": "eslint ./src ./test",
"prettify": "prettier --write ./**/*.js",
Expand All @@ -56,12 +56,16 @@
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-replace": "^5.0.5",
"@rollup/plugin-terser": "^0.4.4",
"acorn": "^8.11.3",
"acorn-walk": "^8.3.2",
"eslint": "^9.1.1",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-jest": "^28.3.0",
"globals": "^15.1.0",
"jest": "^29.7.0",
"magic-string": "^0.30.10",
"prettier": "3.2.5",
"rollup": "^4.17.1"
},
Expand Down
46 changes: 29 additions & 17 deletions rollup.config.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
'use strict'

const terser = require('@rollup/plugin-terser')
const commonjs = require('@rollup/plugin-commonjs')
const replace = require('@rollup/plugin-replace')
const terser = require('@rollup/plugin-terser')

module.exports = {
input: 'src/xmlToJson.js',
output: [
{
file: 'lib/simpleXmlToJson.min.js',
format: 'cjs',
exports: 'auto',
plugins: [terser()]
},
{
file: 'lib/simpleXmlToJson.js',
format: 'cjs',
exports: 'auto'
}
],
plugins: [commonjs()]
module.exports = (commandLineArgs) => {
const { inline } = commandLineArgs
delete commandLineArgs.inline
return {
input: 'src/xmlToJson.js',
output: [
{
file: 'lib/simpleXmlToJson.min.js',
format: 'cjs',
exports: 'auto',
plugins: [terser()]
},
{
file: 'lib/simpleXmlToJson.js',
format: 'cjs',
exports: 'auto'
}
],
plugins: [
commonjs(),
replace({
preventAssignment: true,
values: { 'BUILD.COMPTIME': 'false' }
}),
...(inline ? require('./scripts/inline-plugins') : [])
]
}
}

@jdalton jdalton May 3, 2024

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the "inline" stuff to its own module. It's designed in a way that you can not inline and all the code still works. Inlining just allows code to be annotated like peek(/*inline*/) and it will inline the code at compile time but keep it abstracted during development.

You can see this here:

if (code === CHAR_CODE.OPEN_BRACKET) {
pos += 1
if (
hasNext(/*inline*/) &&
peek(/*inline*/) === CHAR_CODE.FORWARD_SLASH
) {

when inlined it will become:

if (code === 60) {
  pos += 1
  if (
      (currentToken !== EOF_TOKEN && pos < length) &&
      (xmlAsString.charCodeAt(pos)) === 47
  ) {

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JavaScript engines perform inlining during their JIT phase. However because parsing/tokenizing/creating ast can be intensive having it inlined upfront can aid JS engines in their JIT because more things are known up-front (less calling out to other functions).

@jdalton jdalton May 3, 2024

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With inlining enabled in PR #47 the performance is:

After for 4,000 iterations:

avg exec time of 4000 iterations (in ms): 0.553
...
Time: 2.459 s, estimated 3 s

without inlining (so removing --inline from the run command) it is:

After for 4,000 iterations:

avg exec time of 4000 iterations (in ms): 0.743
...
Time: 3.206 s

The idea is that inlining is bonus and so that's why it's an easy on/off and code works without inlining. The inlining bit is an extra oomph that does produce consistently faster results.

240 changes: 240 additions & 0 deletions scripts/inline-plugins.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
'use strict'

const acorn = require('acorn')
const MagicString = require('magic-string')
const replacePlugin = require('@rollup/plugin-replace')
const walk = require('acorn-walk')

const candidates = {
...require('../src/lexer').createLexer(''),
...require('../src/model'),
...require('../src/transpiler')
}
const constants = require('../src/constants')
const inlineCommentRegExp = /\/\*\s*inline\s*\*\//
const inlineInfo = new Map()
const inlineMisses = new Set()

const createAssertNonParamId = (info, paramLookup) => {
const { name: funcName, body } = info
return (id) => {
const { name, start } = id
if (paramLookup.has(name)) {
const { lineStart, lineEnd } = getLineInfo(body, start)
throw new Error(
`Problem inlining '${name}' in ${funcName}:\n${body.slice(
lineStart,
lineEnd
)}\n${' '.repeat(start - lineStart)}^`
)
}
}
}

const createFuncInfo = (funcName) => {
const func = candidates[funcName]
if (typeof func !== 'function') return undefined
const source = `${func}`
const ast = acorn.parse(source, { ecmaVersion: 'latest' })
const {
body: { 0: stmtNode }
} = ast
const funcNode = stmtNode.expression ?? stmtNode
const { body: bodyNode } = funcNode
const { start: bodyStart } = bodyNode
const padding = ' '.repeat(bodyStart)
const magicString = new MagicString(
padding + source.slice(bodyStart, bodyNode.end)
)
replaceInlinableCallExpressions(ast, magicString)
const info = {
name: funcName,
params: funcNode.params.map(getIdent).flat(),
body: magicString.toString(),
ast: funcNode
}
inlineInfo.set(funcName, info)
return info
}

const getConstantsReplacements = () => {
const replacements = {}
for (const { 0: name, 1: constant } of Object.entries(constants)) {
if (typeof constant === 'object' && constant !== null) {
for (const { 0: key, 1: value } of Object.entries(constant)) {
replacements[`${name}.${key}`] = JSON.stringify(value)
}
} else {
replacements[name] = JSON.stringify(constant)
}
}
return replacements
}

const getIdent = (node) => {
switch (node.type) {
case 'ArrayPattern':
return node.elements.map(getParam)
case 'AssignmentPattern':
return node.left.name
case 'ObjectPattern':
return node.properties.map(getParam)
case 'RestElement':
return node.argument.name
case 'Identifier':
default:
return node.name
}
}

const getInlineInfo = (funcName) => {
let info = inlineInfo.get(funcName)
if (info) return info
info = createFuncInfo(funcName)
if (info === undefined) return undefined
inlineInfo.set(funcName, info)
return info
}

const getLineInfo = (input, pos) => {
let line = 1
let cur = 0
while (true) {
const nextBreak = nextLineBreak(input, cur, pos)
if (nextBreak < 0) {
return {
line,
lineStart: cur,
lineEnd: nextLineBreak(input, pos),
col: pos - cur
}
}
line += 1
cur = nextBreak + 1
}
}

const nextLineBreak = (input, from, end = input.length) => {
const { length } = input
if (length === 0) return -1
const lastIndex = length - 1
for (let i = from; i < end; i += 1) {
if (i === lastIndex) return i
const code = input.charCodeAt(i)
if (code === 10 || code === 13 || code === 0x2028 || code === 0x2029) {
return i < end - 1 && code === 13 && input.charCodeAt(i + 1) === 10
? i + 1
: i
}
}
return -1
}

const paramIdentifierVisitor = (() => {
const AssignmentExpression = (node, st, c) => {
// Skip replacing identifiers on the left side of an assignment.
st.assertNonParamId(node.left)
c(node.right, st, 'Expression')
}
const Identifier = (node, { magicString, paramToArg }) => {
const arg = paramToArg.get(node.name)
if (arg) {
magicString.overwrite(node.start, node.end, arg)
}
}
const Property = (node, st, c) => {
// Skip shorthand property identifiers.
if (node.shorthand) return st.assertNonParamId(node.key)
if (node.computed) c(node.key, st, 'Expression')
if (node.value) c(node.value, st, 'Expression')
}
const VariableDeclarator = (node, st, c) => {
// Skip replacing local variable declarators.
st.assertNonParamId(node.id)
if (node.init) c(node.init, st, 'Expression')
}
return {
AssignmentExpression,
AssignmentPattern: AssignmentExpression,
Identifier,
MethodDefinition: Property,
Property,
PropertyDefinition: Property,
VariableDeclarator
}
})()

const inlinableCallExpressionReplacer = (match, funcName, args) => {

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it make more sense to rename match to source?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case its simulating string replace where a pattern is matched and then capture groups like funcName and args follow.

const info = getInlineInfo(funcName)
if (info === undefined) {
if (!inlineMisses.has(funcName)) {
inlineMisses.add(funcName)
console.warn(`inline miss for ${funcName}()`)
}
return match
}
const magicString = new MagicString(info.body)
const paramToArg = new Map(info.params.map((p, i) => [p, `${args[i]}`]))
walk.recursive(
info.ast.body,
{
magicString,
paramToArg,
assertNonParamId: createAssertNonParamId(info, paramToArg)
},
paramIdentifierVisitor
)
const result = magicString.toString()
const trimmed = result.trim()
return info.ast.expression ? `(${trimmed})` : trimmed.slice(1, -1)
}

const inlinableCallExpressionVisitor = {
CallExpression(node, { magicString }) {
const { arguments: args, start, end } = node
if (
inlineCommentRegExp.test(
magicString.original.slice(
start,
args.length ? args[0].start : end - 1
)
)
) {
magicString.overwrite(
start,
end,
inlinableCallExpressionReplacer(
magicString.original.slice(start, end),
node.callee.name,
args.map((a) => magicString.original.slice(a.start, a.end))
)
)
}
}
}

const replaceInlinableCallExpressions = (ast, magicString) => {
walk.simple(ast, inlinableCallExpressionVisitor, null, { magicString })
}

module.exports = [
{
name: 'inline',
transform: (source) => {
const ast = acorn.parse(source, {
ecmaVersion: 'latest',
sourceType: 'module'
})
const magicString = new MagicString(source)
replaceInlinableCallExpressions(ast, magicString)
return {
code: magicString.toString(),
map: magicString.generateMap({ hires: true })
}
}
},
replacePlugin({
preventAssignment: false,

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be true?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making it true prevents replacement in case: statements in switchs. As long as we aren't assigning to the constants this is fine to be true.

values: getConstantsReplacements()
})
]
24 changes: 24 additions & 0 deletions src/constants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'use strict'

module.exports = {
BUILD: {
COMPTIME: true

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

explain?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Languages like Zig and others have a notion of at "compile time". So when we are building the lib output with rollup and we load the modules it is at "compile time" this way during compile time we can export things that need to be inlined else not export them. The rollup "replace" plugin takes care of it seamlessly.

},
NODE_TYPE: {
ATTRIBUTE: 'ATTRIBUTE',
CONTENT: 'CONTENT',
ELEMENT: 'ELEMENT',
ROOT: 'ROOT'
},
TOKEN_TYPE: {
OPEN_BRACKET: 'OPEN_BRACKET',
ELEMENT_TYPE: 'ELEMENT_TYPE',
CLOSE_ELEMENT: 'CLOSE_ELEMENT',
ATTRIB_NAME: 'ATTRIB_NAME',
ATTRIB_VALUE: 'ATTRIB_VALUE',
ASSIGN: 'ASSIGN',
CLOSE_BRACKET: 'CLOSE_BRACKET',
CONTENT: 'CONTENT',
EOF: 'EOF'
}
}
10 changes: 6 additions & 4 deletions src/converters/astToJson.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict'

const { NODE_TYPE } = require('../constants')

const convertToJSON = ast => {
return buildJSONFromNode(ast.value.children[0])
}
Expand All @@ -8,7 +10,7 @@ const buildJSONFromNode = node => {
if (!node) return null
const json = {}
switch (node.type) {
case 'ELEMENT': {
case NODE_TYPE.ELEMENT: {
let element = {}
const attribs = buildAttributes(node.value.attributes)
const children = buildJSONFromNode(node.value.children)
Expand All @@ -22,12 +24,12 @@ const buildJSONFromNode = node => {
json[node.value.type] = element
break
}
case 'ATTRIBUTE': {
case NODE_TYPE.ATTRIBUTE: {
const attribNameAndValue = node.value
json[attribNameAndValue.name] = attribNameAndValue.value
break
}
case 'CONTENT': {
case NODE_TYPE.CONTENT: {
return {content: node.value}
}
default: {
Expand All @@ -50,7 +52,7 @@ const buildChildren = children => {
}
}

const isContentChildren = children => children && Array.isArray(children) && children.length === 1 && children[0].type === 'CONTENT'
const isContentChildren = children => children && Array.isArray(children) && children.length === 1 && children[0].type === NODE_TYPE.CONTENT

const buildAttributes = arrayNodes => {
if (arrayNodes && Array.isArray(arrayNodes)) {
Expand Down
Loading