Skip to content

Instantly share code, notes, and snippets.

@williamhogman
Last active February 12, 2016 08:41
Show Gist options
  • Select an option

  • Save williamhogman/92465439f94f5be84ebd to your computer and use it in GitHub Desktop.

Select an option

Save williamhogman/92465439f94f5be84ebd to your computer and use it in GitHub Desktop.
import re, collections
p = re.compile(r"(<em>.+</em>|[^\s]+)")
EXAMPLE = "Experienced Software Developer Specialties: Software Engineering :Design,Development,Test Programming :Java/J2EE,C#,C++,Erlang,SQL Development: Servlet, JSP, JSF, RESTful web services,JSON, Spring , Hibernate, Jackrabbit, Java SE ( AWT, Swing, Thread, JDBC ,RMI), J2EE (EJB, JSP, Webservice), ASP.NET Test :TDD,Junit,Mokito,Jmeter Software development methodology : Agile(XP, Scrum) DBMS :SQL Server, MySQL, Oracle <em>Distributed Systems</em> Big Data Processing :Hadoop MapReduce"
MAX_LEN = 50
EM_INVISIBLE_CHARS = 9
def excerpt(x):
band = collections.deque()
queue_len = 0
center = -1
toks = re.findall(p, x)
for s in toks:
band.append(s)
queue_len += len(s)
while queue_len > MAX_LEN:
removed = band.popleft()
queue_len -= len(removed)
if s.startswith("<em>"):
center = queue_len - len(s) if center == -1 else (center + queue_len) / 2
queue_len -= EM_INVISIBLE_CHARS
elif center != -1 and queue_len - center > MAX_LEN / 2:
yield list(band)
band.clear()
queue_len = 0
center = -1
if center != -1:
yield list(band)
res = ([" ".join(x) for x in excerpt(EXAMPLE)])
for x in res:
print(len(x.replace("<em>", "").replace("</em>", "")))
print(x)
print()
"... :SQL Server, MySQL, Oracle Distributed Systems Big Data..."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment