Last active
January 10, 2026 15:37
-
-
Save nemoinho/373162cd88171b00804fff7a8171f96b to your computer and use it in GitHub Desktop.
A demonstration of a test-suite for an AddressParser. The purpose is to split German street-housenumber address lines correctly into a tuple of street and housenumber.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| export class AddressParser { | |
| splitStreetAndHouseNo(streetWithHouseNo: string): string[] { | |
| // No number means we only have a street | |
| if (/^(Straße \d+|[A-Z]\d)$/.test(streetWithHouseNo)) { | |
| return [streetWithHouseNo, '']; | |
| } | |
| // If the input contains " Nr." use markter to split the line | |
| const noMatcher = streetWithHouseNo.match(/\sNr\./i); | |
| if (noMatcher) { | |
| const parts = streetWithHouseNo.split('Nr.'); | |
| return [parts.shift()!.trim(), 'Nr.' + parts.join('Nr.')]; | |
| } | |
| // Use a Regex to look for a house-number pattern at the end of the line | |
| // and, if found a house-number, split the line right before that. | |
| // | |
| // see https://regexper.com/#%2F%28%3F%3A%5Cd%2B%28%3F%3A%5Cs%3F%5Ba-z%5D%2B%29%3F%28%3F%3A%5Cs%5B-%5C%2F%5D%5Cs%7C%5B-%5C%2F%5D%29%29%3F%5Cd%2B%28%3F%3A%5Cs%3F%5Ba-z%5D%2B%7C%5Cs%5Cd%5C%2F%5Cd%29%3F%24%7C%5Cd%2B%28%3F%3A%5Cs%3F%5Ba-z%5D%2B%29%3F%5Cs%2B%5C%2F%7B2%7D%5Cs%2B.*%2Fi | |
| const pattern = | |
| /(?:\d+(?:\s?[a-z]+)?(?:\s[-\/]\s|[-\/]))?\d+(?:\s?[a-z]+|\s\d\/\d)?$|\d+(?:\s?[a-z]+)?\s+\/{2}\s+.*/i; | |
| const match = streetWithHouseNo.match(pattern); | |
| if (match) { | |
| const street = streetWithHouseNo.slice(0, match.index).trim(); | |
| const houseNumber = match[0].trim(); | |
| return [street, houseNumber]; | |
| } | |
| // Otherwise assume that no housenumber is present | |
| return [streetWithHouseNo, '']; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { describe, expect, it } from 'vitest' | |
| import { AddressParser } from './AddressParser' | |
| describe('AddressParser', () => { | |
| const addressParser = new AddressParser() | |
| // Real German street names, with typical specials. | |
| // Like spaces, numbers (at different places), and german special chars | |
| const commonStreets = [ | |
| 'Schulweg', | |
| 'Güntherstraße', | |
| 'Alte Straße', | |
| 'Straße des 17. Juni', | |
| '1. Hafenstraße', | |
| ] | |
| // A common valid house-number has to follow the DIN 5008, which means beside | |
| // the actual number it might also match any of these special cases: | |
| // - If a building has multiple house numbers, these are written with a | |
| // hyphen or slash; | |
| // - Subsequent letters are separated from the house number by a space; | |
| // - The following letters can be written in lowercase or uppercase; | |
| // - Apartment numbers or similar are separated by two slashes; | |
| // | |
| // Furthermore a few deviatations are very common in practice and must be | |
| // considered valid as well: | |
| // - missing mandatory space between a number and subsequent letter; | |
| // - missing mandatory space around "-", "/", or "//" | |
| // | |
| // In summary all these house-numbers must be considered valid and common! | |
| const commonHousenumbers = [ | |
| '7', | |
| '7a', | |
| '7 a', | |
| '4-7', | |
| '4-7a', | |
| '4-7 a', | |
| '4a-7', | |
| '4a-7a', | |
| '4a-7 a', | |
| '4 a-7', | |
| '4 a-7a', | |
| '4 a-7 a', | |
| '4 - 7', | |
| '4 - 7a', | |
| '4 - 7 a', | |
| '4a - 7', | |
| '4a - 7a', | |
| '4a - 7 a', | |
| '4 a - 7', | |
| '4 a - 7a', | |
| '4 a - 7 a', | |
| '4/7', | |
| '4/7a', | |
| '4/7 a', | |
| '4a/7', | |
| '4a/7a', | |
| '4a/7 a', | |
| '4 a/7', | |
| '4 a/7a', | |
| '4 a/7 a', | |
| '4 / 7', | |
| '4 / 7a', | |
| '4 / 7 a', | |
| '4a / 7', | |
| '4a / 7a', | |
| '4a / 7 a', | |
| '4 a / 7', | |
| '4 a / 7a', | |
| '4 a / 7 a', | |
| '4 // 7', | |
| '4 // 7a', | |
| '4 // 7 a', | |
| '4a // 7', | |
| '4a // 7a', | |
| '4a // 7 a', | |
| '4 a // 7', | |
| '4 a // 7a', | |
| '4 a // 7 a', | |
| '4 // App. 7', | |
| '4 // App. 7a', | |
| '4 // App. 7 a', | |
| '4a // App. 7', | |
| '4a // App. 7a', | |
| '4a // App. 7 a', | |
| '4 a // App. 7', | |
| '4 a // App. 7a', | |
| '4 a // App. 7 a', | |
| '4 // 7 Stock.', | |
| '4a // 7 Stock.', | |
| '4 a // 7 Stock.', | |
| ] | |
| const allCombinationsOf = <A, B>(a: A[], b: B[]): (readonly [A, B])[] => | |
| a.flatMap(aa => b.map(bb => [aa, bb])) | |
| describe('common street-housenumber combinations', () => { | |
| const streetsWithHouseNumbers = allCombinationsOf( | |
| commonStreets, | |
| commonHousenumbers, | |
| ) | |
| streetsWithHouseNumbers.forEach(([street, houseNumber]) => { | |
| const fullAddressLine = `${street} ${houseNumber}` | |
| it(`should parse "${fullAddressLine}"`, () => { | |
| expect(addressParser.splitStreetAndHouseNo(fullAddressLine)) | |
| .toEqual([street, houseNumber]) | |
| }) | |
| }) | |
| commonStreets.forEach(street => { | |
| it(`should parse "${street}" without a house-number`, () => { | |
| expect(addressParser.splitStreetAndHouseNo(street)) | |
| .toEqual([street, '']) | |
| }) | |
| }) | |
| }) | |
| describe('new street without a name yet', () => { | |
| const houseNumbersOfUnnamedStreets = commonHousenumbers.map(n => `Nr. ${n}`) | |
| houseNumbersOfUnnamedStreets.forEach((houseNumber) => { | |
| const fullAddressLine = `Straße 17 ${houseNumber}` | |
| it(`should parse "${fullAddressLine}"`, () => { | |
| expect(addressParser.splitStreetAndHouseNo(fullAddressLine)) | |
| .toEqual(['Straße 17', houseNumber]) | |
| }) | |
| }) | |
| it(`should parse "Straße 17" without a house-number`, () => { | |
| expect(addressParser.splitStreetAndHouseNo('Straße 17')) | |
| .toEqual(['Straße 17', '']) | |
| }) | |
| }) | |
| describe('special streets and house-numbers', () => { | |
| describe('Quadratestadt in Mannheim', () => { | |
| it('should parse "D4 17"', () => { | |
| expect(addressParser.splitStreetAndHouseNo('D4 17')) | |
| .toEqual(['D4', '17']) | |
| }) | |
| it(`should parse "D4" without a house-number`, () => { | |
| expect(addressParser.splitStreetAndHouseNo('D4')) | |
| .toEqual(['D4', '']) | |
| }) | |
| }) | |
| describe('Streets of cologne with "halfed" house-numbers', () => { | |
| it('should parse "Alte Straße 17 1/2"', () => { | |
| expect(addressParser.splitStreetAndHouseNo('Alte Straße 17 1/2')) | |
| .toEqual(['Alte Straße', '17 1/2']) | |
| }) | |
| }) | |
| }) | |
| }) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment