-
-
Save oscarduignan/adb5d0276fccbec40d31dcc4bc8f9b33 to your computer and use it in GitHub Desktop.
| //> using scala 3.3 | |
| //> using toolkit typelevel:0.1.29 | |
| //> using dep com.softwaremill.macwire::macros:2.6.4 | |
| //> using dep org.scalameta:scalameta_2.13:4.7.7 | |
| //> using dep com.lihaoyi::pprint:0.9.0 | |
| import com.softwaremill.macwire.* | |
| import cats.effect.* | |
| import cats.syntax.all.* | |
| import scala.concurrent.duration.* | |
| import java.nio.file.Path | |
| import cats.effect.unsafe.IORuntime | |
| import scala.meta.internal.semanticdb.{Locator, MethodSignature, SymbolInformation, TypeRef, ValueSignature} | |
| import scala.io.Source | |
| import scala.meta.internal.semanticdb.SymbolInformation.Kind.METHOD | |
| import scala.collection.concurrent.TrieMap | |
| import scala.util.Using | |
| case class TeamId(value: String) | |
| case class Team(id: TeamId, name: String) | |
| trait Teams: | |
| def getTeamById(id: TeamId): Option[Team] | |
| object Teams: | |
| def of(teams: Team*): Teams = new Teams: | |
| private val database = teams.map(team => team.id -> team).toMap | |
| override def getTeamById(id: TeamId): Option[Team] = database.get(id) | |
| given (using deps: Dependencies): Teams = deps.teams | |
| case class RepoId(value: String) | |
| case class Repository(id: RepoId, path: Path, teamId: TeamId) | |
| trait Repos: | |
| def getReposByTeam(id: TeamId): Seq[Repository] | |
| def getRepoById(id: RepoId): Option[Repository] | |
| object Repos: | |
| def of(repos: Repository*): Repos = new Repos: | |
| private val byId = repos.map(repo => repo.id -> repo).toMap | |
| private val byTeam = repos.groupBy(_.teamId) | |
| override def getReposByTeam(id: TeamId): Seq[Repository] = byTeam.getOrElse(id, Seq.empty) | |
| override def getRepoById(id: RepoId): Option[Repository] = byId.get(id) | |
| def repos(using deps: Dependencies): Repos = deps.repos | |
| given (using deps: Dependencies): Repos = deps.repos | |
| class ResourceManager(using runtime: IORuntime): | |
| private val shutdown: Ref[IO, IO[Unit]] = Ref.unsafe(IO.unit) | |
| def manage[A](resource: Resource[IO, A]): A = | |
| resource.allocated | |
| .flatMap { case (a, release) => | |
| shutdown.update(release *> _).map(_ => a) | |
| } | |
| .unsafeRunSync() | |
| private def shutdownAll: IO[Unit] = | |
| shutdown.getAndSet(IO.unit).flatten | |
| object ResourceManager: | |
| def apply(runtime: IORuntime): Resource[IO, ResourceManager] = | |
| Resource.make { IO(new ResourceManager(using runtime)) } { _.shutdownAll } | |
| def usingResourceManager[A](block: ResourceManager ?=> IO[A]): IO[A] = | |
| ResourceManager(IORuntime.global).use { implicit rm => block } | |
| extension [A](resource: Resource[IO, A])(using withinScope: ResourceManager) def useInScope: A = withinScope.manage(resource) | |
| trait ConnectionPool | |
| object ConnectionPool: | |
| def create: Resource[IO, ConnectionPool] = | |
| Resource.make { | |
| IO.println("✅Starting a fake connection pool") | |
| *> IO.sleep(1.second) | |
| *> IO(new ConnectionPool {}) | |
| } { _ => | |
| IO.println("🔻Closing connection pool") | |
| *> IO.sleep(2.seconds) | |
| *> IO.println("✅Connection pool closed") | |
| } | |
| trait HttpClient: | |
| def get(url: String): IO[String] | |
| class HttpClientImpl(connectionPool: ConnectionPool) extends HttpClient: | |
| def get(url: String): IO[String] = | |
| IO.println(s"🛜HTTP GET $url") | |
| *> IO.pure(s"response from $url") | |
| object Github: | |
| opaque type OrgName = String | |
| object OrgName: | |
| def apply(value: String): OrgName = value | |
| opaque type RepoName = String | |
| object RepoName: | |
| def apply(value: String): RepoName = value | |
| case class Repo(org: OrgName, name: RepoName) | |
| trait GithubClient: | |
| def getLikes(repo: Github.Repo): IO[GithubLikes] | |
| object GithubClient: | |
| def live(httpClient: HttpClient): GithubClient = (repo: Github.Repo) => | |
| IO.println(s"💾Fetching github likes for $repo") | |
| *> httpClient.get(s"https://github.com/${repo.org}/${repo.name}") | |
| *> IO.pure(GithubLikes(42)) | |
| def githubClient(using githubClient: GithubClient) = githubClient | |
| given (using deps: Dependencies): GithubClient = deps.githubClient | |
| trait Dependencies(using ResourceManager): | |
| // connectionPool is a Resource that needs to acquire/release things, useInScope is an extension method | |
| private lazy val connectionPool = ConnectionPool.create.useInScope | |
| lazy val httpClient: HttpClient = new HttpClientImpl(connectionPool) // or wire[HttpClientImpl] | |
| lazy val githubClient: GithubClient = wireWith(GithubClient.live) | |
| lazy val teams: Teams = Teams.of( | |
| Team(TeamId("platui"), "Plat UI"), | |
| Team(TeamId("platops"), "PlatOps") | |
| ) | |
| lazy val repos: Repos = Repos.of( | |
| Repository(RepoId("contact-frontend"), Path.of("local-git-repos/contact-frontend"), TeamId("platui")), | |
| Repository(RepoId("accessibility-statement-frontend"), Path.of("local-git-repos/accessibility-statement-frontend"), TeamId("platui")), | |
| Repository(RepoId("help-frontend"), Path.of("local-git-repos/help-frontend"), TeamId("platui")) | |
| ) | |
| trait TestDependencies extends Dependencies: | |
| override lazy val httpClient: HttpClient = (url: String) => | |
| IO.println("Using a fake http client, so you should not see connection pool started") | |
| *> IO.pure("fake response") | |
| // what are the benefits of having the cache within the metric vs having | |
| // a shared cache that all metrics use? is it worth it for what I'm doing? | |
| // do I need to have a way of defining how the cache key is calculated? | |
| trait Metric[E, A]: | |
| private val cache = TrieMap.empty[E, Deferred[IO, A]] | |
| protected def from(entity: E)(using Dependencies): IO[A] | |
| def compute(entity: E)(using Dependencies): IO[A] = | |
| Deferred[IO, A].flatMap: deferred => | |
| cache.putIfAbsent(entity, deferred) match | |
| case Some(existingDeferred) => | |
| existingDeferred.get | |
| case None => | |
| from(entity).attempt.flatMap: | |
| case Right(result) => | |
| deferred.complete(result) *> IO.pure(result) | |
| case Left(error) => | |
| cache.remove(entity) | |
| IO.raiseError(error) | |
| object Metric: | |
| def apply[E, A](f: E => Dependencies ?=> IO[A]): Metric[E, A] = | |
| new Metric[E, A]: | |
| def from(entity: E)(using deps: Dependencies): IO[A] = f(entity) | |
| def createProcess(command: List[String], directory: java.io.File): Resource[IO, Process] = | |
| Resource.make { | |
| IO.blocking( | |
| new ProcessBuilder(command: _*) | |
| .directory(directory) | |
| .redirectErrorStream(true) | |
| .start() | |
| ) | |
| } { process => | |
| IO.blocking(process.destroy()) | |
| .guarantee(IO.blocking(process.waitFor()).void) | |
| } | |
| case class CompiledWithSemanticDB(exitCode: Int) // todo should it save path? | |
| given Metric[Repository, CompiledWithSemanticDB] = Metric: repo => | |
| createProcess( | |
| List("sbt", "set semanticdbEnabled := true; clean; compile"), | |
| repo.path.toFile | |
| ).use: compilationProcess => | |
| IO.blocking: | |
| println(s"🏗️compiling with semanticdb: $repo") | |
| CompiledWithSemanticDB(compilationProcess.waitFor()) | |
| <* IO.println("💪compilation completed") | |
| object PlayFrontendHmrc: | |
| case class TwirlTemplate(path: Path, contents: String) | |
| case class ComponentUsage(template: TwirlTemplate, component: String, variable: String, usages: Seq[String]) | |
| private def findUsagesIn(template: String, variable: String): Seq[String] = | |
| s"""(?s)@?${variable}(?=\\()(?:(?=.*?\\((?!.*?\\1)(.*\\)(?!.*\\2).*))(?=.*?\\)(?!.*?\\2)(.*)).)+?.*?(?=\\1)[^(]*(?=\\2$$)""".stripMargin.r | |
| .findAllMatchIn(template) | |
| .map(_.group(0)) | |
| .toList | |
| def findComponentUsages(repo: Path): Seq[ComponentUsage] = | |
| val usages = collection.mutable.ListBuffer[ComponentUsage]() | |
| Locator(repo.resolve("target"))((path, result) => { | |
| usages ++= result.documents.flatMap((document) => { | |
| document.symbols | |
| .collect { | |
| case SymbolInformation(_, _, METHOD, _, displayName, signature, _, _, _, _) => | |
| signature match { | |
| case ValueSignature(tpe: TypeRef) if tpe.symbol.matches("""^uk/gov/hmrc/(?:govuk|hmrc)frontend/views/html.*$""") => | |
| Some((document.uri, displayName, tpe.symbol)) | |
| case MethodSignature(_, _, returnType: TypeRef) | |
| if returnType.symbol.matches("""^uk/gov/hmrc/(?:govuk|hmrc)frontend/views/html.*$""") => | |
| Some((document.uri, displayName, returnType.symbol)) | |
| case _ => None | |
| } | |
| case _ => None | |
| } | |
| .flatten | |
| .map { | |
| case (compiledPath, variable, component) => { | |
| val twirlPath = repo.resolve( | |
| compiledPath | |
| .replaceFirst("^.*/main/", "app/") | |
| .replaceFirst("/html/", "/") | |
| .replaceFirst("template.scala", "scala.html") | |
| ) | |
| val twirlContents = Using(Source.fromFile(twirlPath.toFile))(_.mkString) | |
| .getOrElse("unable to read twirl template") | |
| ComponentUsage( | |
| TwirlTemplate( | |
| twirlPath, | |
| twirlContents | |
| ), | |
| component, | |
| variable, | |
| findUsagesIn(twirlContents, variable) | |
| ) | |
| } | |
| } | |
| }) | |
| }) | |
| usages.toSeq | |
| given Metric[Repository, Seq[PlayFrontendHmrc.ComponentUsage]] = Metric: repo => | |
| for { | |
| _ <- repo.metric[CompiledWithSemanticDB] // todo return empty seq or make it Either of error or seq of usages | |
| _ <- IO.println("🔎Searching for usages of play-frontend-hmrc components") | |
| usages = PlayFrontendHmrc.findComponentUsages(repo.path) | |
| _ <- IO.println(s"🧠Found ${usages.length} usages of our components in $repo") | |
| } yield usages | |
| case class LineCount(count: Int) | |
| given Metric[Repository, LineCount] = Metric: repo => | |
| IO.println(s"👨💻Computing line count for $repo") | |
| *> IO.sleep(5.second) | |
| *> IO.pure(LineCount(42)) | |
| given Metric[Team, LineCount] = Metric: team => | |
| summon[Repos] | |
| .getReposByTeam(team.id) | |
| .parTraverse(_.metric[LineCount].map(_.count)) | |
| .map(_.sum) | |
| .map(LineCount.apply) | |
| // would be cool to have an example of a GithubClient that had some permits | |
| // and was rate limited in maybe a few different ways, maybe with a circuit | |
| // breaker too just to totally over-engineer it for learning. | |
| case class GithubLikes(count: Int) | |
| given Metric[Repository, GithubLikes] = Metric: repo => | |
| import Github.* | |
| for { | |
| likes <- githubClient.getLikes(Repo(OrgName("hmrc"), RepoName(repo.id.value))) | |
| _ <- IO.sleep(5.second).onCancel(IO.println("Cleaning github context after early cancellation")) | |
| } yield likes | |
| extension [E](entity: E) | |
| def metric[A](using metric: Metric[E, A], deps: Dependencies): IO[A] = | |
| metric.compute(entity) | |
| extension (usages: Seq[PlayFrontendHmrc.ComponentUsage]) | |
| def omittingTemplateContents: Seq[PlayFrontendHmrc.ComponentUsage] = | |
| usages.map(usage => usage.copy(template = usage.template.copy(contents = "<<OMITTED>>"))) | |
| object Metrics extends IOApp.Simple: | |
| def run = usingResourceManager: | |
| given deps: Dependencies = new Dependencies {} // or new TestDependencies {} | |
| val repo = repos.getRepoById(RepoId("contact-frontend")).get | |
| for | |
| _ <- IO.println("Hello toolkit!") | |
| _ <- IO.println("Starting to collect metrics") | |
| // in sequence: | |
| // lines <- repo.metric[LineCount] | |
| // likes <- repo.metric[GithubLikes] | |
| // in parallel: | |
| metrics <- ( | |
| repo.metric[LineCount], | |
| repo.metric[GithubLikes], | |
| repo.metric[Seq[PlayFrontendHmrc.ComponentUsage]] | |
| ).parMapN((_, _, _)) | |
| (lines, likes, componentUsages) = metrics | |
| teamLineCount <- summon[Teams].getTeamById(repo.teamId).get.metric[LineCount] | |
| _ <- IO.println(s"Line count: $lines") | |
| _ <- IO.println(s"Github likes: $likes") | |
| _ <- IO.println(s"Team line count: $teamLineCount") | |
| _ <- IO(pprint.pprintln(componentUsages.omittingTemplateContents)) | |
| _ <- repo.metric[LineCount] | |
| _ <- IO.sleep(5.seconds) | |
| platformTotal <- ( | |
| summon[Teams].getTeamById(TeamId("platui")).get.metric[LineCount], | |
| summon[Teams].getTeamById(TeamId("platops")).get.metric[LineCount] | |
| ).parMapN(_.count + _.count) | |
| _ <- IO.println(s"🎉All-together across all teams we wrote $platformTotal lines 👏") | |
| // showing how you can use that little helper anywhere we've given our deps: | |
| response <- deps.httpClient.get("http://example.com") <* IO.println("📩Posted results somewhere") | |
| _ <- IO.println("Finished collecting metrics") | |
| yield () |
Taking it further, blew my mind a bit with it by incorporating my find usages of our components stuff into this, and realised something that I had thought I couldn't do, which was use semanticdb from scala 3, very satisfying to see:

This kind of stuff is useful because what if we want to report to someone if a service is using the file upload component, this makes it easy to reuse - can just build a metric that uses the find usages one and sees if there exists a usage with the file upload component, you could definitely create a report of that other ways, but with this you could really ergonomically compose metrics into different reports I think. And taking the caching one step further, you could use something like caffeine to cache stuff and persist the cache for each metric to disk with a ttl and load them back up before generating a report so that it becomes a lot quicker to update reports later. For example, report on people using the file upload component was great but now we additionally would like to see "was the service updated in the last month?" "which people contributed to it in the last month?" so you can find examples that are actively maintained and who to try talking with about stuff, or it could be "can we see when the template the file upload component was used in was last updated?" lots of facets you could compute and cache, and I guess you could always have as well, if memory use got too much, some explicit way to clear caches that might get large
current version https://gist.github.com/oscarduignan/9b7b86971ff87ac3829377267bbde2b2 just hacking in my own time out of curiosity
Updated it so that the caching actually works, added emojis so what's happening is easier to scan, and added some team metrics to show how it can calculate metrics multiple times without repeating the possibly expensive calculation and (I think at least 😅) without the way you define metrics and use them being to unergonomic, I wasn't sure about the way dependencies get passed around but I'm quite happy with it I think, not too much boilerplate, especially considering you're more likely to often add to / update metrics or compose reports with them than introducing dependencies