Source code for jasy.script.api.Comment
#
# Jasy - Web Tooling Framework
# Copyright 2010-2012 Zynga Inc.
# Copyright 2013-2014 Sebastian Werner
#
import re
import jasy.core.Text as Text
import jasy.core.Console as Console
from jasy import UserError
from jasy.script.util import *
# Used to measure the doc indent size (with leading stars in front of content)
docIndentReg = re.compile(r"^(\s*\*\s*)(\S*)")
# Used to split type lists as supported by throw, return and params
listSplit = re.compile("\s*\|\s*")
# Used to remove markup sequences after doc processing of comment text
stripMarkup = re.compile(r"<.*?>")
# Matches return blocks in comments
returnMatcher = re.compile(r"^\s*\{([a-zA-Z0-9_ \.\|\[\]]+)\}")
# Matches type definitions in comments
typeMatcher = re.compile(r"^\s*\{=([a-zA-Z0-9_ \.]+)\}")
# Matches tags
tagMatcher = re.compile(r"#([a-zA-Z][a-zA-Z0-9]+)(\((\S+)\))?(\s|$)")
# Matches param declarations in own dialect
paramMatcher = re.compile(r"@([a-zA-Z0-9_][a-zA-Z0-9_\.]*[a-zA-Z0-9_]|[a-zA-Z0-9_]+)(\s*\{([a-zA-Z0-9_ \.\|\[\]]+?)(\s*\.{3}\s*)?((\s*\?\s*(\S+))|(\s*\?\s*))?\})?")
# Matches links in own dialect
linkMatcher = re.compile(r"(\{((static|member|property|event)\:)?([a-zA-Z0-9_\.]+)?(\#([a-zA-Z0-9_]+))?\})")
# matches backticks and has a built-in failsafe for backticks which do not terminate on the same line
tickMatcher = re.compile(r"(`[^\n`]*?`)")
[docs]class CommentException(Exception):
"""Thrown when errors during comment processing are detected."""
def __init__(self, message, lineNo=0):
Exception.__init__(self, "Comment error: %s (line: %s)" % (message, lineNo + 1))
[docs]class Comment():
"""
Comment class is attached to parsed nodes and used to store all comment related information.
The class supports a new Markdown and TomDoc inspired dialect to make developers life easier and work less
repeative.
"""
# Relation to code
context = None
# Dictionary of tags
tags = None
# Dictionary of params
params = None
# List of return types
returns = None
# Static type
type = None
# Collected text of the comment (without the extracted doc relevant data)
text = None
# Text with extracted / parsed data
__processedText = None
# Text of the comment converted to HTML including highlighting (only for doc comment)
__highlightedText = None
# Text / Code Blocks in the comment
__blocks = None
def __init__(self, text, context=None, lineNo=0, indent="", fileId=None):
# Store context (relation to code)
self.context = context
# Store fileId
self.fileId = fileId
# Figure out the type of the comment based on the starting characters
# Inline comments
if text.startswith("//"):
# "// hello" => " hello"
text = " " + text[2:]
self.variant = "single"
# Doc comments
elif text.startswith("/**"):
# "/** hello */" => " hello "
text = " " + text[3:-2]
self.variant = "doc"
# Protected comments which should not be removed (e.g these are used for license blocks)
elif text.startswith("/*!"):
# "/*! hello */" => " hello "
text = " " + text[3:-2]
self.variant = "protected"
# A normal multiline comment
elif text.startswith("/*"):
# "/* hello */" => " hello "
text = " " + text[2:-2]
self.variant = "multi"
else:
raise CommentException("Invalid comment text: %s" % text, lineNo)
# Multi line comments need to have their indentation removed
if "\n" in text:
text = self.__outdent(text, indent, lineNo)
# For single line comments strip the surrounding whitespace
else:
# " hello " => "hello"
text = text.strip()
# The text of the comment before any processing took place
self.text = text
# Perform annotation parsing, markdown conversion and code highlighting on doc blocks
if self.variant == "doc":
# Separate text and code blocks
self.__blocks = self.__splitBlocks(text)
# Re-combine everything and apply processing and formatting
plainText = '' # text without annotations but with markdown
for b in self.__blocks:
if b["type"] == "comment":
processed = self.__processDoc(b["text"], lineNo)
b["processed"] = processed
if "<" in processed:
plainText += stripMarkup.sub("", processed)
else:
plainText += processed
else:
plainText += "\n\n" + b["text"] + "\n\n"
# The without any annotations
self.text = plainText.strip()
def __splitBlocks(self, text):
"""
Splits up text and code blocks in comments.
This will try to use Misaka for Markdown parsing if available and will fallback to a simpler implementation in
order to allow processing of doc parameters and links without Misaka being installed.
"""
if not Text.supportsMarkdown:
return self.__splitSimple(text)
marked = Text.markdownToHtml(text)
def unescape(html):
html = html.replace('<', '<')
html = html.replace('>', '>')
html = html.replace('&', '&')
html = html.replace('"', '"')
return html.replace(''', "'")
parts = []
lineNo = 0
lines = text.split("\n")
markedLines = marked.split("\n")
i = 0
while i < len(markedLines):
l = markedLines[i]
# the original text of the line
parsed = unescape(stripMarkup.sub("", l))
# start of a code block, grab all text before it and move it into a block
if l.startswith('<pre><code>'):
# everything since the last code block and before this one must be text
comment = []
for s in range(lineNo, len(lines)):
source = lines[s]
if source.strip() == parsed.strip():
lineNo = s
break
comment.append(source)
parts.append({
"type": "comment",
"text": "\n".join(comment)
})
# Find the end of the code block
e = i
while i < len(markedLines):
l = markedLines[i]
i += 1
if l.startswith('</code></pre>'):
break
lineCount = (i - e) - 1
# add the code block
parts.append({
"type": "code",
"text": "\n".join(lines[lineNo:lineNo + lineCount])
})
lineNo += lineCount
else:
i += 1
# append the rest of the comment as text
parts.append({
"type": "comment",
"text": "\n".join(lines[lineNo:])
})
return parts
def __splitSimple(self, text):
"""Splits comment text and code blocks by manually parsing a subset of markdown."""
inCode = False
oldIndent = 0
parts = []
wasEmpty = False
wasList = False
lineNo = 0
lines = text.split("\n")
for s, l in enumerate(lines):
# ignore empty lines
if not l.strip() == "":
# get indentation value and change
indent = len(l) - len(l.lstrip())
change = indent - oldIndent
# detect code blocks
if change >= 4 and wasEmpty:
if not wasList:
oldIndent = indent
inCode = True
parts.append({
"type": "comment",
"text": "\n".join(lines[lineNo:s])
})
lineNo = s
# detect outdents
elif change < 0:
inCode = False
parts.append({
"type": "code",
"text": "\n".join(lines[lineNo:s - 1])
})
lineNo = s
# only keep track of old previous indentation outside of comments
if not inCode:
oldIndent = indent
# remember whether this marked a list or not
wasList = l.strip().startswith('-') or l.strip().startswith('*')
wasEmpty = False
else:
wasEmpty = True
parts.append({
"type": "code" if inCode else "comment",
"text": "\n".join(lines[lineNo:])
})
return parts
[docs] def getHtml(self, highlight=True):
"""
Returns the comment text converted to HTML.
:param highlight: Whether to highlight the code
:type highlight: bool
"""
if not Text.supportsMarkdown:
raise UserError("Markdown is not supported by the system. Documentation comments could converted to HTML.")
if highlight:
if self.__highlightedText is None:
highlightedText = ""
for block in self.__blocks:
if block["type"] == "comment":
highlightedText += Text.highlightCodeBlocks(Text.markdownToHtml(block["processed"]))
else:
highlightedText += "\n%s" % Text.highlightCodeBlocks(Text.markdownToHtml(block["text"]))
self.__highlightedText = highlightedText
return self.__highlightedText
else:
if self.__processedText is None:
processedText = ""
for block in self.__blocks:
if block["type"] == "comment":
processedText += Text.markdownToHtml(block["processed"])
else:
processedText += "\n%s\n\n" % block["text"]
self.__processedText = processedText.strip()
return self.__processedText
def __outdent(self, text, indent, startLineNo):
"""Outdent multi line comment text and filtering empty lines."""
lines = []
# First, split up the comments lines and remove the leading indentation
for lineNo, line in enumerate((indent + text).split("\n")):
if line.startswith(indent):
lines.append(line[len(indent):].rstrip())
elif line.strip() == "":
lines.append("")
else:
# Only warn for doc comments, otherwise it might just be code commented out
# which is sometimes formatted pretty crazy when commented out
if self.variant == "doc":
Console.warn("Could not outdent doc comment at line %s in %s", startLineNo + lineNo, self.fileId)
return text
# Find first line with real content, then grab the one after it to get the
# characters which need
outdentString = ""
for lineNo, line in enumerate(lines):
if line != "" and line.strip() != "":
matchedDocIndent = docIndentReg.match(line)
if not matchedDocIndent:
# As soon as we find a non doc indent like line we stop
break
elif matchedDocIndent.group(2) != "":
# otherwise we look for content behind the indent to get the
# correct real indent (with spaces)
outdentString = matchedDocIndent.group(1)
break
lineNo += 1
# Process outdenting to all lines (remove the outdentString from the start of the lines)
if outdentString != "":
lineNo = 0
outdentStringLen = len(outdentString)
for lineNo, line in enumerate(lines):
if len(line) <= outdentStringLen:
lines[lineNo] = ""
else:
if not line.startswith(outdentString):
# Only warn for doc comments, otherwise it might just be code commented out
# which is sometimes formatted pretty crazy when commented out
if self.variant == "doc":
Console.warn("Invalid indentation in doc comment at line %s in %s", startLineNo + lineNo, self.fileId)
else:
lines[lineNo] = line[outdentStringLen:]
# Merge final lines and remove leading and trailing new lines
return "\n".join(lines).strip("\n")
def __processDoc(self, text, startLineNo):
text = self.__extractStaticType(text)
text = self.__extractReturns(text)
text = self.__extractTags(text)
# Collapse new empty lines at start/end
text = text.strip("\n\t ")
parsed = ''
# Now parse only the text outside of backticks
last = 0
def split(match):
# Grab the text before the back tick and process any parameters in it
nonlocal parsed
nonlocal last
start, end = match.span()
before = text[last:start]
parsed += self.__processParams(before) + match.group(1)
last = end
tickMatcher.sub(split, text)
# add the rest of the text
parsed += self.__processParams(text[last:])
text = self.__processLinks(parsed)
return text
def __splitTypeList(self, decl):
if decl is None:
return decl
splitted = listSplit.split(decl.strip())
result = []
for entry in splitted:
# Figure out if it is marked as array
isArray = False
if entry.endswith("[]"):
isArray = True
entry = entry[:-2]
store = {
"name" : entry
}
if isArray:
store["array"] = True
if entry in builtinTypes:
store["builtin"] = True
if entry in pseudoTypes:
store["pseudo"] = True
result.append(store)
return result
def __extractReturns(self, text):
"""Extracts leading return defintion (when type is function)"""
def collectReturn(match):
self.returns = self.__splitTypeList(match.group(1))
return ""
return returnMatcher.sub(collectReturn, text)
def __extractStaticType(self, text):
"""Extracts leading type defintion (when value is a static type)"""
def collectType(match):
self.type = match.group(1).strip()
return ""
return typeMatcher.sub(collectType, text)
def __extractTags(self, text):
"""
Extract all tags inside the give doc comment.
These are replaced from the text and collected inside the "tags" key as a dict.
"""
def collectTags(match):
if not self.tags:
self.tags = {}
name = match.group(1)
param = match.group(3)
if name in self.tags:
self.tags[name].add(param)
elif param:
self.tags[name] = set([param])
else:
self.tags[name] = True
return ""
return tagMatcher.sub(collectTags, text)
def __processParams(self, text):
def collectParams(match):
paramName = match.group(1)
paramTypes = match.group(3)
paramDynamic = match.group(4) is not None
paramOptional = match.group(5) is not None
paramDefault = match.group(7)
if paramTypes:
paramTypes = self.__splitTypeList(paramTypes)
if self.params is None:
self.params = {}
params = self.params
fullName = match.group(1).strip()
names = fullName.split('.')
for i, mapName in enumerate(names):
# Ensure we have the map object in the params
if mapName not in params:
params[mapName] = {}
# Add new entries and overwrite if a type is defined in this entry
if not mapName in params or paramTypes is not None:
# Make sure to not overwrite something like @options {Object} with the type of @options.x {Number}
if i == len(names) - 1:
paramEntry = params[mapName] = {}
if paramTypes is not None:
paramEntry["type"] = paramTypes
if paramDynamic:
paramEntry["dynamic"] = paramDynamic
if paramOptional:
paramEntry["optional"] = paramOptional
if paramDefault is not None:
paramEntry["default"] = paramDefault
else:
paramEntry = params[mapName]
else:
paramEntry = params[mapName]
# create fields for new map level
if i + 1 < len(names):
if not "fields" in paramEntry:
paramEntry["fields"] = {}
params = paramEntry["fields"]
return '<code class="param">%s</code>' % fullName
return paramMatcher.sub(collectParams, text)
def __processLinks(self, text):
def formatTypes(match):
parsedSection = match.group(3)
parsedFile = match.group(4)
parsedItem = match.group(6)
# Do not match {}
if parsedSection is None and parsedFile is None and parsedItem is None:
return match.group(1)
# Minor corrections
if parsedSection and not parsedItem:
parsedSection = ""
attr = ""
link = ""
label = ""
if parsedSection:
link += '%s:' % parsedSection
if parsedFile:
link += parsedFile
label += parsedFile
if parsedItem:
link += "~%s" % parsedItem
if label == "":
label = parsedItem
else:
label += "#%s" % parsedItem
# add link to attributes list
attr += ' href="#%s"' % link
# build final HTML
return '<a%s><code>%s</code></a>' % (attr, label)
return linkMatcher.sub(formatTypes, text)